This tutorial covers the basic population genetics analyses for a set of populations (or closely related species)
1) Evaluating, filtering and plotting the genetic data
2) Test for Hardy-Weinberg-Equilibrium (HWE) and linkage between loci
3) Calculating, assessing and plotting genotypic richness and diversity, heterozygosity, F-statistics, and isolation-by-distance (IBD)
4) Performing and plotting principal component analyses (PCA) and discriminant analyses of principal components (DAPC)
5) Creating maps based on the DAPC results

As input is required (see Import Data paragraph below)
1) a vcf file with the genetic data
2) a dataset containing individual IDs, longitude, latitude and population/species assignment for each sample/individual

The tutorial uses an example dataset of unpublished ddRAD data (164 individiduals of 8 species) from the Hemileuca maia buck moth species group (Saturniidae: Lepidoptera) distributed throughout the US and southern Canada.

1) Evaluate data

Install and load required packages
install_if_missing <- function(pkg) { #create function to check and install required packages
  if (!require(pkg, character.only = T)) {
    install.packages(pkg, dependencies = T)
    library(pkg, character.only = T)}}  
install_if_missing("ggplot2") 
install_if_missing("viridis") 
install_if_missing("dplyr") 
install_if_missing("adegenet") 
install_if_missing("plotly") 
install_if_missing("hierfstat")
install_if_missing("pegas")
install_if_missing("vcfR")
install_if_missing("poppr")
install_if_missing("reshape2")
install_if_missing("geosphere")
install_if_missing("stringr")
install_if_missing("mmod")
Import data
rm(list = ls()) #clear environment

setwd("C:/Users/danie/Desktop/PhD research/Pop data/") #set working directory (containing vcf and datafile)
dataset <- read.table(file = "C:/Users/danie/Desktop/PhD research/Pop data/dataset.txt", sep = "\t", header = T, stringsAsFactors = T) #import dataset containing Individual ID, Species, longitude and Latitude
pop_dataset <- vcfR2genind(read.vcfR("C:/Users/danie/Desktop/PhD research/Pop data/pop_dataset.vcf")) #import vcf files and convert it to genind object
Set population and individual IDs
population_assignment <- dataset$Species #set population assignment
population_assignment <- factor(population_assignment, levels = unique(population_assignment))
pop_dataset$pop <- droplevels(as.factor(population_assignment))
indNames(pop_dataset) <- dataset$ID #assign individual names using ID label
Evaluate data
head(dataset) #check dataset
##                       ID Latitude Longitude      Species
## 1   H.m.sandra_AL_Hmg344 33.44000 -86.61000       H.maia
## 2   H.m.sandra_AL_Hmg353 33.44000 -86.61000       H.maia
## 3   H.m.sandra_AL_Hmg358 33.44000 -86.61000       H.maia
## 4 H.latifascia_IA_Hmg315 41.92489 -95.99636 H.latifascia
## 5 H.latifascia_IA_Hmg321 41.92489 -95.99636 H.latifascia
## 6   H.m.sandra_KS_Hmg015 39.09000 -96.58000       H.maia
unique(pop_dataset$pop) #show populations
## [1] H.maia                 H.latifascia           H.lucina              
## [4] H.maia.x.lucina        H.nevadensis           H.sp.GreatLakesComplex
## [7] H.slosseri             H.peigleri            
## 8 Levels: H.maia H.latifascia H.lucina H.maia.x.lucina ... H.peigleri
nLoc(pop_dataset) #show number of loci
## [1] 549
poppr(pop_dataset) #show basic summary table, including number of individuals (N) and genotypes (MLG) per population, Shannon-Wiener index (H; Shannon 2001, genotype diversity), Stoddart and Taylor's (1988) G index (G, genotype diversity) and Nei’s unbiased gene diversity (Nei 1978; Hexp)
##                      Pop   N MLG eMLG       SE    H   G lambda E.5     Hexp
## 1                 H.maia  75  75   10 0.00e+00 4.32  75  0.987   1 0.008531
## 2           H.latifascia  15  15   10 2.77e-07 2.71  15  0.933   1 0.001636
## 3               H.lucina  10  10   10 0.00e+00 2.30  10  0.900   1 0.000639
## 4        H.maia.x.lucina   1   1    1 0.00e+00 0.00   1  0.000 NaN 0.000000
## 5           H.nevadensis  22  22   10 0.00e+00 3.09  22  0.955   1 0.000351
## 6 H.sp.GreatLakesComplex  10  10   10 0.00e+00 2.30  10  0.900   1 0.000000
## 7             H.slosseri  19  19   10 2.51e-07 2.94  19  0.947   1 0.000968
## 8             H.peigleri  12  12   10 0.00e+00 2.48  12  0.917   1 0.003906
## 9                  Total 164 164   10 0.00e+00 5.10 164  0.994   1 0.028232
##     Ia  rbarD        File
## 1 3.95 0.0153 pop_dataset
## 2 3.32 0.0253 pop_dataset
## 3 2.54 0.0352 pop_dataset
## 4   NA     NA pop_dataset
## 5 2.59 0.0642 pop_dataset
## 6 2.85 0.0275 pop_dataset
## 7 2.98 0.0227 pop_dataset
## 8 3.14 0.0258 pop_dataset
## 9 3.57 0.0103 pop_dataset
private_alleles(pop_dataset) %>% apply(MARGIN = 1, FUN = sum) #show number of private alleles (alleles found exclusively in one population) per site across all loci for each population
##                 H.maia           H.latifascia               H.lucina 
##                    473                     74                     31 
##        H.maia.x.lucina           H.nevadensis H.sp.GreatLakesComplex 
##                      0                    241                     44 
##             H.slosseri             H.peigleri 
##                     69                     29
Filter data
missing_data <- poppr::info_table(pop_dataset, type = "missing", plot = T) #plot missing data across loci and populations

round(missing_data[1:length(unique(pop_dataset$pop)), 1:4], 2) #show proportion of missing data across populations for first four loci
##                         Locus
## Population               Chromosome24_358443 Chromosome24_358452
##   H.maia                                0.08                0.05
##   H.latifascia                          0.07                0.07
##   H.lucina                              0.20                0.20
##   H.maia.x.lucina                       1.00                1.00
##   H.nevadensis                          0.09                0.09
##   H.sp.GreatLakesComplex                0.30                0.30
##   H.slosseri                            0.11                0.11
##   H.peigleri                            0.08                0.08
##                         Locus
## Population               Chromosome24_358457 Chromosome24_358458
##   H.maia                                0.05                0.08
##   H.latifascia                          0.07                0.07
##   H.lucina                              0.20                0.20
##   H.maia.x.lucina                       1.00                1.00
##   H.nevadensis                          0.09                0.09
##   H.sp.GreatLakesComplex                0.30                0.30
##   H.slosseri                            0.11                0.11
##   H.peigleri                            0.08                0.08
pop_dataset <- poppr::informloci(pop_dataset, 
                                 MAF = 0.01, #only retain loci with at least 1% of the minor allele (minor allele frequency cutoff to 5%)
                                 cutoff = 2 / nInd(pop_dataset)) #only loci with at least (2 / total number of individuals) differentiating genotypes are retained (a locus must have at least (2 / total number of individuals) of its genotypes be different from each other to be considered useful and being retained to ensure that only loci with enough variation are kept)
## cutoff value: 1.21951219512195 % ( 2 samples ).
## MAF         : 0.01
## 
##  Found 283 uninformative loci 
##  ============================ 
##  0 loci found with a cutoff of 2 samples   
##  283 loci found with MAF < 0.01 :
##  Chromosome24_358457, Chromosome24_358480, Chromosome24_358514,
## Chromosome29_216196, Chromosome29_216204, Chromosome29_216217,
## Chromosome29_216223, Chromosome29_216254, Chromosome30i_1475928,
## Chromosome30i_1475944, Chromosome30i_1476002, Chromosome30i_2476518,
## Chromosome30i_2476546, ctg00000001_1_7828523, ctg00000001_1_7828537,
## ctg00000001_1_8964629, ctg00000001_1_9453151, ctg00000001_1_9453152,
## ctg00000001_1_9453153, ctg00000001_1_9453217, ctg00000001_1_9453218,
## ctg00000001_1_10554521, ctg00000001_1_10554544, ctg00000001_1_12408856,
## ctg00000001_1_12408858, ctg00000001_1_12408871, ctg00000001_1_12416218,
## ctg00000001_1_12416246, ctg00000001_1_14226152, ctg00000001_1_14226160,
## ctg00000001_1_14226167, ctg00000001_1_14226184, ctg00000002_1_849307,
## ctg00000002_1_849313, ctg00000002_1_849315, ctg00000002_1_849338,
## ctg00000002_1_849341, ctg00000002_1_4063952, ctg00000002_1_4063959,
## ctg00000002_1_4063965, ctg00000002_1_4063968, ctg00000002_1_4063978,
## ctg00000002_1_4063989, ctg00000003_1_1051579, ctg00000003_1_1051586,
## ctg00000003_1_1051639, ctg00000003_1_1051658, ctg00000003_1_2424154,
## ctg00000003_1_2424211, ctg00000003_1_2424227, ctg00000003_1_2808819,
## ctg00000003_1_2808825, ctg00000003_1_2808888, ctg00000003_1_2808891,
## ctg00000003_1_2808897, ctg00000003_1_4110209, ctg00000003_1_4110212,
## ctg00000003_1_4110248, ctg00000003_1_4904340, ctg00000003_1_4904341,
## ctg00000003_1_4904342, ctg00000003_1_4904347, ctg00000003_1_4904381,
## ctg00000003_1_7667629, ctg00000003_1_7667678, ctg00000003_1_7667703,
## ctg00000003_1_10993711, ctg00000003_1_10993746, ctg00000003_1_10993752,
## ctg00000003_1_10993758, ctg00000004_1_6353370, ctg00000004_1_6353382,
## ctg00000004_1_6353383, ctg00000004_1_6353415, ctg00000004_1_8507507,
## ctg00000004_1_8507523, ctg00000004_1_8507536, ctg00000004_1_8507549,
## ctg00000004_1_8507566, ctg00000004_1_17062426, ctg00000004_1_17062461,
## ctg00000004_1_17062467, ctg00000005_1_39788, ctg00000005_1_39819,
## ctg00000005_1_39825, ctg00000005_1_39851, ctg00000005_1_2854486,
## ctg00000005_1_2854504, ctg00000005_1_2854560, ctg00000005_1_12161398,
## ctg00000005_1_12161428, ctg00000005_1_13627665, ctg00000005_1_13775183,
## ctg00000005_1_13775215, ctg00000005_1_13775226, ctg00000005_1_16183129,
## ctg00000005_1_16183132, ctg00000005_1_16183163, ctg00000005_1_16183216,
## ctg00000005_1_16585441, ctg00000005_1_16762194, ctg00000005_1_16762196,
## ctg00000005_1_16762204, ctg00000005_1_16762221, ctg00000005_1_16762237,
## ctg00000005_1_16762239, ctg00000005_1_16762272, ctg00000005_1_16762275,
## ctg00000005_1_16762281, ctg00000006_1_10939176, ctg00000006_1_10939207,
## ctg00000006_1_11253046, ctg00000006_1_17071611, ctg00000006_1_17071687,
## ctg00000007_1_880357, ctg00000007_1_880399, ctg00000007_1_880407,
## ctg00000007_1_880409, ctg00000007_1_880411, ctg00000007_1_880417,
## ctg00000007_1_880419, ctg00000007_1_2340133, ctg00000007_1_2340174,
## ctg00000007_1_2340176, ctg00000007_1_2340178, ctg00000007_1_2340179,
## ctg00000007_1_4686069, ctg00000007_1_9432358, ctg00000007_1_9432369,
## ctg00000007_1_9432370, ctg00000007_1_9432382, ctg00000008_1_1329324,
## ctg00000008_1_1329325, ctg00000008_1_1329373, ctg00000008_1_1329381,
## ctg00000008_1_12670593, ctg00000008_1_12670604, ctg00000008_1_12670614,
## ctg00000008_1_12670616, ctg00000008_1_14150999, ctg00000008_1_14151004,
## ctg00000008_1_14693202, ctg00000008_1_14693220, ctg00000008_1_14693228,
## ctg00000008_1_14693229, ctg00000008_1_14693264, ctg00000009_1_2742839,
## ctg00000009_1_2742850, ctg00000009_1_2742854, ctg00000009_1_2742868,
## ctg00000010_1_2259213, ctg00000010_1_2259232, ctg00000010_1_2259287,
## ctg00000010_1_3393151, ctg00000010_1_3393188, ctg00000010_1_3393199,
## ctg00000011_1_1511900, ctg00000011_1_1511916, ctg00000011_1_1511963,
## ctg00000011_1_15160649, ctg00000011_1_15160659, ctg00000011_1_15160692,
## ctg00000012_1_377654, ctg00000012_1_377672, ctg00000012_1_377698,
## ctg00000012_1_377699, ctg00000012_1_839965, ctg00000012_1_839974,
## ctg00000012_1_3950277, ctg00000012_1_13095605, ctg00000012_1_13095621,
## ctg00000012_1_13095632, ctg00000012_1_13095677, ctg00000012_1_13095689,
## ctg00000013_1_1138550, ctg00000013_1_1138570, ctg00000013_1_1138589,
## ctg00000013_1_1138599, ctg00000013_1_1138608, ctg00000013_1_1138613,
## ctg00000013_1_1138616, ctg00000013_1_1545622, ctg00000013_1_1545627,
## ctg00000013_1_1545675, ctg00000013_1_6142190, ctg00000013_1_8398605,
## ctg00000013_1_8398611, ctg00000013_1_8398633, ctg00000013_1_8398640,
## ctg00000013_1_8398652, ctg00000013_1_8398654, ctg00000013_1_8398661,
## ctg00000013_1_14156355, ctg00000018_1_4211436, ctg00000018_1_4211482,
## ctg00000018_1_4211483, ctg00000018_1_4211495, ctg00000018_1_4211510,
## ctg00000018_1_9034832, ctg00000018_1_9034843, ctg00000018_1_9034865,
## ctg00000018_1_9034878, ctg00000018_1_9034884, ctg00000018_1_10289957,
## ctg00000018_1_10748167, ctg00000018_1_10748198, ctg00000018_1_10748212,
## ctg00000018_1_10748215, ctg00000018_1_10748221, ctg00000018_1_10958065,
## ctg00000018_1_10958078, ctg00000018_1_12473010, ctg00000019_1_6928392,
## ctg00000019_1_6928396, ctg00000019_1_6928397, ctg00000019_1_6928399,
## ctg00000019_1_6928401, ctg00000019_1_6928411, ctg00000019_1_12304438,
## ctg00000019_1_12304439, ctg00000019_1_12304447, ctg00000019_1_12304491,
## ctg00000019_1_12304498, ctg00000020_1_5092275, ctg00000020_1_5092296,
## ctg00000020_1_5092311, ctg00000020_1_5092314, ctg00000020_1_5092336,
## ctg00000020_1_11841819, ctg00000020_1_11841837, ctg00000020_1_11841861,
## ctg00000021_1_11026161, ctg00000021_1_11026203, ctg00000022_1_4076414,
## ctg00000022_1_4076432, ctg00000022_1_4076444, ctg00000022_1_4076448,
## ctg00000022_1_4076453, ctg00000022_1_9093691, ctg00000022_1_9093733,
## ctg00000022_1_9093735, ctg00000023_1_8781750, ctg00000023_1_8781762,
## ctg00000023_1_8781765, ctg00000023_1_8781807, ctg00000023_1_8781813,
## ctg00000027_1_1234792, ctg00000027_1_1234800, ctg00000027_1_1234804,
## ctg00000027_1_1234807, ctg00000027_1_1234808, ctg00000027_1_1234811,
## ctg00000027_1_6873532, ctg00000027_1_6873559, ctg00000027_1_6873581,
## ctg00000028_1_7039670, ctg00000028_1_7039685, ctg00000028_1_7039704,
## ctg00000028_1_7039715, ctg00000028_1_7039740, ctg00000029_1_645349,
## ctg00000029_1_645366, ctg00000029_1_2812162, ctg00000029_1_2812214,
## ctg00000029_1_2812231, ctg00000029_1_2812241, ctg00000029_1_2812243,
## ctg00000029_1_2812248, ctg00000029_1_4340856, ctg00000029_1_4340866,
## ctg00000029_1_4340916, ctg00000029_1_4340931, ctg00000034_1_3304170,
## ctg00000034_1_3304171, ctg00000034_1_3304195, ctg00000034_1_3304216,
## locus_619310_20, locus_619310_26, locus_619310_45, locus_619310_55,
## locus_619310_82, locus_619310_87, locus_619310_90
pop_dataset <- pop_dataset %>% missingno("loci", cutoff = 0.25) #remove loci with average missing data higher than 25%
## 
## Found 15851 missing values.
## 
## 2 loci contained missing values greater than 25%
## 
## Removing 2 loci: ctg00000009_1_2742855, ctg00000011_1_1511894
#pop_dataset <- pop_dataset %>% missingno("geno", cutoff = 0.3) #remove individuals with more than 30% missing genotypes
Reevaluate data after filtering
poppr(pop_dataset, quiet  = T) #show basic summary table, including number of individuals (N) and genotypes (MLG) per population, Shannon-Wiener index (H; genotype diversity), Stoddart and Taylor's G index (G, genotype diversity) and expected heterozygosity (Hexp) per population
##                      Pop   N MLG eMLG       SE    H   G lambda E.5     Hexp
## 1                 H.maia  75  75   10 0.00e+00 4.32  75  0.987   1 0.010682
## 2           H.latifascia  15  15   10 2.77e-07 2.71  15  0.933   1 0.003361
## 3               H.lucina  10  10   10 0.00e+00 2.30  10  0.900   1 0.001372
## 4        H.maia.x.lucina   1   1    1 0.00e+00 0.00   1  0.000 NaN 0.000000
## 5           H.nevadensis  22  22   10 0.00e+00 3.09  22  0.955   1 0.000723
## 6 H.sp.GreatLakesComplex  10  10   10 0.00e+00 2.30  10  0.900   1 0.000000
## 7             H.slosseri  19  19   10 2.51e-07 2.94  19  0.947   1 0.001984
## 8             H.peigleri  12  12   10 0.00e+00 2.48  12  0.917   1 0.008333
## 9                  Total 164 164   10 0.00e+00 5.10 164  0.994   1 0.020394
##     Ia  rbarD        File
## 1 3.98 0.0235 pop_dataset
## 2 3.08 0.0324 pop_dataset
## 3 2.45 0.0381 pop_dataset
## 4   NA     NA pop_dataset
## 5 2.31 0.0744 pop_dataset
## 6 3.23 0.0417 pop_dataset
## 7 2.90 0.0283 pop_dataset
## 8 3.35 0.0334 pop_dataset
## 9 3.73 0.0176 pop_dataset
missing_data <- poppr::info_table(pop_dataset, type = "missing", plot = T) #plot missing data across loci and populations

round(missing_data[1:length(unique(pop_dataset$pop)), 1:4], 2) #show proportion of missing data across populations for first four loci
##                         Locus
## Population               Chromosome24_358443 Chromosome24_358452
##   H.maia                                0.08                0.05
##   H.latifascia                          0.07                0.07
##   H.lucina                              0.20                0.20
##   H.maia.x.lucina                       1.00                1.00
##   H.nevadensis                          0.09                0.09
##   H.sp.GreatLakesComplex                0.30                0.30
##   H.slosseri                            0.11                0.11
##   H.peigleri                            0.08                0.08
##                         Locus
## Population               Chromosome24_358458 Chromosome24_358474
##   H.maia                                0.08                0.05
##   H.latifascia                          0.07                0.07
##   H.lucina                              0.20                0.20
##   H.maia.x.lucina                       1.00                1.00
##   H.nevadensis                          0.09                0.09
##   H.sp.GreatLakesComplex                0.30                0.30
##   H.slosseri                            0.11                0.11
##   H.peigleri                            0.08                0.08
nLoc(pop_dataset) #show number of loci
## [1] 264
private_alleles(pop_dataset) %>% apply(MARGIN = 1, FUN = sum) #show number of private alleles (alleles found exclusively in one population) per site across all loci for each population
##                 H.maia           H.latifascia               H.lucina 
##                    289                     25                     17 
##        H.maia.x.lucina           H.nevadensis H.sp.GreatLakesComplex 
##                      0                    222                     11 
##             H.slosseri             H.peigleri 
##                     36                      7
plot(summary(pop_dataset)$n.by.pop, summary(pop_dataset)$pop.n.all, 
     xlab = "Number of indiduals", ylab = "Number of alleles", 
     main = "Alleles numbers and sample sizes", type = "n")
text(summary(pop_dataset)$n.by.pop, summary(pop_dataset)$pop.n.all,
     lab = names(summary(pop_dataset)$n.by.pop))

barplot(summary(pop_dataset)$Hexp - summary(pop_dataset)$Hobs, 
        main = "Heterozygosity: expected-observed", ylab = "Hexp - Hobs")

barplot(summary(pop_dataset)$n.by.pop, 
        main = "Sample sizes per population", ylab = "Number of genotypes", las = 3)

tail(sort(round(summary(pop_dataset)$Hobs, 2))) #show loci with highest observed heterozygosity
## ctg00000004_1_8507570 ctg00000007_1_4686089 ctg00000004_1_8507573 
##                  0.33                  0.34                  0.35 
## ctg00000018_1_4211457 ctg00000019_1_6928341 ctg00000029_1_4340858 
##                  0.35                  0.35                  0.39
Assess genotypic richness and diversity for each population

Diversity indices incorporate both genotypic richness and abundance.

round(((poppr(pop_dataset))$eMLG), 2) #show genotypic richness accounting for sample size differences (eMLG) via rarefaction
## [1] 10 10 10  1 10 10 10 10 10
round(((poppr(pop_dataset))$lambda), 2) #show Simpson's index lambda (Simpson 1949; measure of genotypic diversity: estimation of probability that two randomly selected genotypes are different scaling from 0 with no genotypes are different to 1 so that all genotypes are different)
## [1] 0.99 0.93 0.90 0.00 0.95 0.90 0.95 0.92 0.99
Corr_Simp_ind <- round((((poppr(pop_dataset, quiet  = T))$N / ((poppr(pop_dataset, quiet  = T))$N - 1)) * (poppr(pop_dataset, quiet  = T))$lambda), 2) #calculate sample-size-corrected Simpson's index
data.frame(Population = levels(pop_dataset$pop), Value = Corr_Simp_ind[1:length(levels(pop_dataset$pop))], stringsAsFactors = F) #print corrected Simpson's index
##               Population Value
## 1                 H.maia     1
## 2           H.latifascia     1
## 3               H.lucina     1
## 4        H.maia.x.lucina   NaN
## 5           H.nevadensis     1
## 6 H.sp.GreatLakesComplex     1
## 7             H.slosseri     1
## 8             H.peigleri     1
Assess evenness for each population

Eveness is a measure of distribution of genotype abundances so that a population with equally abundant genotypes yields value equal to 1 and a population dominated by single genotype is closer to zero.

round(((poppr(pop_dataset))$E.5), 2) #show evenness E5 (Pielou 1975, Ludwig & Reynolds 1988, GrĂĽnwald et al. 2003)
## [1]   1   1   1 NaN   1   1   1   1   1

2) Basic population analyses

Test if mean observed heterozygosity is significantly lower than mean expected heterozygosity

A significant p-value indicates lower observed heterozygosity

if ((bartlett.test(list(summary(pop_dataset)$Hexp, summary(pop_dataset)$Hobs)))$p.value < 0.05) { #test for homogeneity of variances using Bartlett test
  var_equal <- T  #variances are significantly different
} else {var_equal <- F}  #variances are not significantly different
t.test(summary(pop_dataset)$Hexp, summary(pop_dataset)$Hobs, #perform t-test
       paired = T, var.equal = var_equal, alternative = "greater")
## 
##  Paired t-test
## 
## data:  summary(pop_dataset)$Hexp and summary(pop_dataset)$Hobs
## t = 11.445, df = 263, p-value < 2.2e-16
## alternative hypothesis: true mean difference is greater than 0
## 95 percent confidence interval:
##  0.04159992        Inf
## sample estimates:
## mean difference 
##      0.04861071
Test for Hardy-Weinberg-Equilibrium for each population and plot results
hwe_results <- lapply(seppop(pop_dataset), hw.test, B = 0)
## Warning in hw.test.loci(x = x, B = B, ...): The following loci were ignored: Chromosome24_358443
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: Chromosome24_358452
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: Chromosome24_358458
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: Chromosome24_358474
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: Chromosome24_358511
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_7828507
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_7828542
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_7828546
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_9453136
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_9453145
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_9453161
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_9453188
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408829
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408841
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408872
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408878
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408879
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416200
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416221
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416241
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416260
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416269
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000003_1_2808866
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000003_1_2808894
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39811
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39837
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39838
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39844
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39863
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627657
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627661
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627687
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627702
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627734
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627737
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13775219
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_16183149
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_16183220
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_16183223
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000006_1_17071610
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000006_1_17071669
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000006_1_17071679
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000006_1_17071700
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000007_1_2340206
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000007_1_9432361
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000007_1_9432417
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000008_1_1329350
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000008_1_1329379
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000008_1_14693223
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000008_1_14693269
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377648
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377650
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377659
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377687
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377693
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000013_1_6142160
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211444
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211457
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211466
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211478
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211511
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_10748197
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_10748239
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_10748240
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_10748245
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_12472987
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_12473001
## (not the same ploidy for all individuals, or too many missing data)The follo
hwe_pvalues_df <- melt(sapply(hwe_results, "[", i = T, j = 3)) #extract p-values from each population's test results and convert matrix to data frame for ggplot
colnames(hwe_pvalues_df) <- c("Locus", "Population", "P_value")
hwe_pvalues_df$P_value_adj <- p.adjust(hwe_pvalues_df$P_value, method = "fdr") #perform FDR correction
hwe_pvalues_df$Significant <- hwe_pvalues_df$P_value_adj < 0.05 #create significance column based on adjusted p-values
ggplot(hwe_pvalues_df, aes(x = Locus, y = Population, fill = P_value)) + #plot full heatmap of P-values
  geom_tile(color = "white") +
  scale_fill_viridis_c(option = "mako", name = "P-value") +
  labs(x = "Locus", y = "Population", 
       title = "HWE p-values across loci and populations") +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 8),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        plot.title = element_text(hjust = 0.5),
        plot.margin = margin(0, 0, 0, 0))

ggplot(hwe_pvalues_df, aes(x = Locus, y = Population)) + #plot significant p-values
  geom_tile(aes(fill = Significant), color = "white") + #highlight only significant p-values
  scale_fill_manual(values = c("white", "red"), name = "Significant") + #grey for non-significant, red for significant
  labs(x = "Locus", y = "Population", 
       title = "Significant HWE p-values (FDR Corrected)") +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 8),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        plot.title = element_text(hjust = 0.5),
        plot.margin = margin(0, 0, 0, 0))

Calculate and plot observed and expected heterozygosity for each population
calculate_heterozygosity <- function(genind_data) { #create function to calculate gene diversity within each population
  if (!inherits(genind_data, "genind")) {
    stop("Input must be a genind object")}
  populations <- levels(pop(genind_data))
  observed_heterozygosity <- numeric(length(populations))
  expected_heterozygosity <- numeric(length(populations))
  for (i in seq_along(populations)) {
    pop <- populations[i]
    pop_data <- genind_data[pop(genind_data) == pop] #subset genind object by population
    if (nInd(pop_data) == 0) {
      warning(paste("No data for population:", pop))
      next}
    tryCatch({
      summary_stats <- summary(pop_data) #check if Hobs and Hexp are present before computing means
      if (!is.null(summary_stats$Hobs)) {
        observed_heterozygosity[i] <- mean(summary_stats$Hobs, na.rm = T)}
      if (!is.null(summary_stats$Hexp)) {
        expected_heterozygosity[i] <- mean(summary_stats$Hexp, na.rm = T)}
    }, error = function(e) {
      warning(paste("Error processing population:", pop, "Details:", e$message))})}
  heterozygosity_df <- data.frame(
    Population = populations,
    Observed = round(observed_heterozygosity, 2),
    Expected = round(expected_heterozygosity, 2))
  print(heterozygosity_df) #print results
  return(heterozygosity_df)} #return heterozygosity data frame
plot_heterozygosity <- function(heterozygosity_df) { #create function for plotting
  heterozygosity_long <- reshape2::melt(heterozygosity_df, id.vars = "Population", 
                                        variable.name = "Type", value.name = "Heterozygosity") #melt data frame for plotting
  ggplot(heterozygosity_long, aes(x = Population, y = Heterozygosity, fill = Type)) + 
    geom_bar(stat = "identity", position = position_dodge(width = 0.8)) + 
    labs(x = "Population", y = "Heterozygosity", fill = "Type") + 
    theme_classic() + 
    scale_fill_manual(values = c("Observed" = "darkblue", "Expected" = "lightblue"))} #set fill colors
heterozygosity_df <- calculate_heterozygosity(pop_dataset) #calculate heterozygosity
##               Population Observed Expected
## 1                 H.maia     0.08     0.12
## 2           H.latifascia     0.09     0.09
## 3               H.lucina     0.07     0.08
## 4        H.maia.x.lucina     0.09     0.31
## 5           H.nevadensis     0.02     0.04
## 6 H.sp.GreatLakesComplex     0.08     0.09
## 7             H.slosseri     0.08     0.10
## 8             H.peigleri     0.08     0.09
plot_heterozygosity(heterozygosity_df) #plot heterozygosity

3) Assess and test for population structure

F-statistics (Weir & Cockerham 1984)
  • Fst: Measures genetic differentiation among subpopulations (pop/tot)
  • Fis: Measures inbreeding coefficient within subpopulations (ind/pop)
  • Fit: Measures overall inbreeding coefficient by combining both within and among subpopulation inbreeding
pop_dataset_loci <- pegas::Fst(pegas::as.loci(pop_dataset))
head(round(pop_dataset_loci, 2)) #per-locus F-statistics
##                       Fit   Fst   Fis
## Chromosome24_358443  0.23  0.13  0.12
## Chromosome24_358452 -0.01  0.00 -0.01
## Chromosome24_358458  0.32  0.25  0.09
## Chromosome24_358474  0.69  0.23  0.59
## Chromosome24_358511  0.67  0.09  0.64
## Chromosome29_216180 -0.01 -0.02  0.01
round(colMeans(pop_dataset_loci[, !is.na(colMeans(pop_dataset_loci, na.rm = T)) & !is.nan(colMeans(pop_dataset_loci, na.rm = T))], na.rm=T), 2) #global F-statistics (take into account all genotypes and loci but ignore loci with NA)
##  Fit  Fst  Fis 
## 0.36 0.13 0.27
round(boot.vc(genind2hierfstat(pop_dataset)[1], genind2hierfstat(pop_dataset)[-1], #calculate CIs of F-statistics (H-Total: total expected heterozygosity, F-pop/Total = Fst, F-Ind/Total = Fit, H-pop: expected heterozygosity within populations, F-Ind/pop: Fis, Hobs: observed heterozygosity) ot = 100)$ci, 2) 
              nboot = 5000)$ci, 2) #specify number of bootstraps
##       H-Total F-pop/Total F-Ind/Total H-pop F-Ind/pop Hobs
## 2.5%     0.11        0.19        0.40  0.09      0.24 0.06
## 50%      0.13        0.23        0.44  0.10      0.27 0.07
## 97.5%    0.15        0.28        0.48  0.11      0.31 0.08
Compute and plot measures of genetic differentiation for populations
pairwise_gen_dist_matrix <- function(pop_dataset, genetic_diff_measure, viridis_col) {
  distance_matrix <- switch(genetic_diff_measure,
                            "D_Jost" = as.matrix(mmod::pairwise_D(pop_dataset, linearized = F)), #calculate Jost's D (2008)
                            "Gst_Hedrick" = as.matrix(mmod::pairwise_Gst_Hedrick(pop_dataset, linearized = F)), #calculate Hedrick's G'st (2005)
                            "Fst" = {fst_results <- StAMPP::stamppFst(StAMPP::stamppConvert(dartR::gi2gl(pop_dataset, verbose = 0), type = "genlight"), nboots = 10000, percent = 95, nclusters = 1) #calculate Fst
                            fst_values <- as.matrix(fst_results$Fsts) #extract Fst results
                            p_values <- as.matrix(fst_results$Pvalues) #extract p-values
                            list(values = fst_values, p_values = p_values)}, #return Fst and p-values
                            "Ds_Nei" = as.matrix(genet.dist(pop_dataset, method = "Ds")), #compute Nei's standard genetic distance Ds (Nei 1972)
                            stop("Unsupported genetic differentiation measure. Choose from: 'D_Jost', 'Gst_Hedrick', 'Fst', 'Ds_Nei'."))
  if (genetic_diff_measure == "Fst") { #check if using Fst
    fst_values <- distance_matrix$values #extract Fst values
    p_values <- distance_matrix$p_values #extract p-values
    cat("p-values for Fst:\n")
    print(p_values) #print p-values
  } else {fst_values <- distance_matrix #use regular distance matrix for other measures
  p_values <- NULL} #use no p-values for other measures
  fst_values[fst_values < 0] <- 0 #replace negative values in distance matrix with 0
  
  label_list <- c("D_Jost" = "Jost’s D", "Gst_Hedrick" = "G’", "Fst" = "F", "Ds_Nei" = "G") #labels for each measure
  label <- label_list[[genetic_diff_measure]] #get label
  label_lowercase <- ifelse(genetic_diff_measure %in% c("Gst_Hedrick", "Fst", "Ds_Nei"), "ST", "") #set lowercase label
  cat(paste(label, "values:\n"))
  print(round(fst_values, 2)) #show Fst values rounded to 2 decimal places
  distance_matrix_lower <- fst_values #copy Fst values
  distance_matrix_lower[!(lower.tri(distance_matrix_lower, diag = F))] <- NA #mask upper triangle
  distance_matrix_long <- reshape2::melt(distance_matrix_lower, na.rm = T) #reshape to long format
  colnames(distance_matrix_long) <- c("Row", "Column", "Distance") #rename columns
  if (!is.null(p_values)) { #prepare p-value data for asterisks
    p_values_lower <- p_values #copy p-values
    p_values_lower[!(lower.tri(distance_matrix_lower, diag = F))] <- NA #mask upper triangle
    p_values_long <- reshape2::melt(p_values_lower, na.rm = T) #reshape p-values to long format
    colnames(p_values_long) <- c("Row", "Column", "P_value") #rename columns
    distance_matrix_long <- merge(distance_matrix_long, p_values_long, by = c("Row", "Column")) #merge distance and p-values
    distance_matrix_long$Significance <- ifelse(p_values_long$P_value <= (0.05 / length(unique(pop_dataset$pop)) * (length(unique(pop_dataset$pop)) - 1) / 2), "*", "")} #add asterisks for significant comparisons after Bonferroni correction
  
  if (genetic_diff_measure == "Fst") { #check if using Fst
    plot <- ggplot2::ggplot(distance_matrix_long, aes(x = Column, y = Row, fill = Distance)) + #create heatmap plot
      geom_tile() +
      geom_text(aes(label = Significance), size = 7, color = "#FFA500", na.rm = T) +
      scale_fill_viridis_c(option = viridis_col, direction = -1) + #apply viridis color scale
      theme_classic() +
      labs(x = "Population", y = "Population", fill = bquote(.(label)[.(label_lowercase)]))} #label axes and legend
  else {plot <- ggplot2::ggplot(distance_matrix_long, aes(x = Column, y = Row, fill = Distance)) + #create heatmap plot
    geom_tile() +
    scale_fill_viridis_c(option = viridis_col, direction = -1) + #apply viridis color scale
    theme_classic() +
    labs(x = "Population", y = "Population", fill = bquote(.(label)[.(label_lowercase)]))} #label axes and legend}
  print(plot)} #print plot
pairwise_gen_dist_matrix(pop_dataset, genetic_diff_measure = "Fst", viridis_col = "mako") #compute and plot Fst (Weir and Cockerham 1984)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## Registered S3 method overwritten by 'genetics':
##   method      from 
##   [.haplotype pegas
## Starting gl.compliance.check 
##   Processing genlight object with SNP data
##   The slot loc.all, which stores allele name for each locus, is empty. 
## Creating a dummy variable (A/C) to insert in this slot. 
##   Checking coding of SNPs
##     SNP data scored NA, 0, 1 or 2 confirmed
##   Checking locus metrics and flags
##   Recalculating locus metrics
##   Checking for monomorphic loci
##     No monomorphic loci detected
##   Checking for loci with all missing data
##     No loci with all missing data detected
##   Checking whether individual names are unique.
##   Checking for individual metrics
##   Warning: Creating a slot for individual metrics
##   Checking for population assignments
##     Population assignments confirmed
##   Spelling of coordinates checked and changed if necessary to 
##             lat/lon
## Completed: gl.compliance.check 
## p-values for Fst:
##                        H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia                     NA           NA       NA              NA
## H.latifascia           0.0000           NA       NA              NA
## H.lucina               0.0000       0.0000       NA              NA
## H.maia.x.lucina        0.9252       0.7018   0.3052              NA
## H.nevadensis           0.0000       0.0000   0.0000          0.0000
## H.sp.GreatLakesComplex 0.0000       0.0000   0.0000          0.5395
## H.slosseri             0.0000       0.0000   0.0000          0.6456
## H.peigleri             0.0000       0.0000   0.0000          0.8438
##                        H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia                           NA                     NA         NA
## H.latifascia                     NA                     NA         NA
## H.lucina                         NA                     NA         NA
## H.maia.x.lucina                  NA                     NA         NA
## H.nevadensis                     NA                     NA         NA
## H.sp.GreatLakesComplex            0                     NA         NA
## H.slosseri                        0                      0         NA
## H.peigleri                        0                      0          0
##                        H.peigleri
## H.maia                         NA
## H.latifascia                   NA
## H.lucina                       NA
## H.maia.x.lucina                NA
## H.nevadensis                   NA
## H.sp.GreatLakesComplex         NA
## H.slosseri                     NA
## H.peigleri                     NA
## F values:
##                        H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia                     NA           NA       NA              NA
## H.latifascia             0.07           NA       NA              NA
## H.lucina                 0.14         0.20       NA              NA
## H.maia.x.lucina          0.00         0.00     0.03              NA
## H.nevadensis             0.42         0.52     0.63            0.69
## H.sp.GreatLakesComplex   0.07         0.10     0.23            0.00
## H.slosseri               0.11         0.10     0.22            0.00
## H.peigleri               0.08         0.13     0.21            0.00
##                        H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia                           NA                     NA         NA
## H.latifascia                     NA                     NA         NA
## H.lucina                         NA                     NA         NA
## H.maia.x.lucina                  NA                     NA         NA
## H.nevadensis                     NA                     NA         NA
## H.sp.GreatLakesComplex         0.60                     NA         NA
## H.slosseri                     0.55                   0.12         NA
## H.peigleri                     0.56                   0.14       0.07
##                        H.peigleri
## H.maia                         NA
## H.latifascia                   NA
## H.lucina                       NA
## H.maia.x.lucina                NA
## H.nevadensis                   NA
## H.sp.GreatLakesComplex         NA
## H.slosseri                     NA
## H.peigleri                     NA

pairwise_gen_dist_matrix(pop_dataset, genetic_diff_measure = "D_Jost", viridis_col = "mako") #compute and plot Jost´s D (Host 2008) 
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Jost’s D values:
##                        H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia                   0.00         0.01     0.02            0.00
## H.latifascia             0.01         0.00     0.03            0.00
## H.lucina                 0.02         0.03     0.00            0.01
## H.maia.x.lucina          0.00         0.00     0.01            0.00
## H.nevadensis             0.08         0.07     0.10            0.09
## H.sp.GreatLakesComplex   0.01         0.01     0.03            0.00
## H.slosseri               0.02         0.01     0.03            0.01
## H.peigleri               0.01         0.02     0.03            0.00
##                        H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia                         0.08                   0.01       0.02
## H.latifascia                   0.07                   0.01       0.01
## H.lucina                       0.10                   0.03       0.03
## H.maia.x.lucina                0.09                   0.00       0.01
## H.nevadensis                   0.00                   0.09       0.08
## H.sp.GreatLakesComplex         0.09                   0.00       0.02
## H.slosseri                     0.08                   0.02       0.00
## H.peigleri                     0.08                   0.02       0.01
##                        H.peigleri
## H.maia                       0.01
## H.latifascia                 0.02
## H.lucina                     0.03
## H.maia.x.lucina              0.00
## H.nevadensis                 0.08
## H.sp.GreatLakesComplex       0.02
## H.slosseri                   0.01
## H.peigleri                   0.00

pairwise_gen_dist_matrix(pop_dataset, genetic_diff_measure = "Gst_Hedrick", viridis_col = "mako") #compute and plot Hedrick's G'st (Hedrick 2005)
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## G’ values:
##                        H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia                   0.00         0.10     0.18            0.02
## H.latifascia             0.10         0.00     0.23            0.01
## H.lucina                 0.18         0.23     0.00            0.10
## H.maia.x.lucina          0.02         0.01     0.10            0.00
## H.nevadensis             0.51         0.51     0.62            0.63
## H.sp.GreatLakesComplex   0.10         0.12     0.25            0.04
## H.slosseri               0.13         0.12     0.24            0.07
## H.peigleri               0.11         0.14     0.23            0.01
##                        H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia                         0.51                   0.10       0.13
## H.latifascia                   0.51                   0.12       0.12
## H.lucina                       0.62                   0.25       0.24
## H.maia.x.lucina                0.63                   0.04       0.07
## H.nevadensis                   0.00                   0.59       0.56
## H.sp.GreatLakesComplex         0.59                   0.00       0.14
## H.slosseri                     0.56                   0.14       0.00
## H.peigleri                     0.55                   0.17       0.08
##                        H.peigleri
## H.maia                       0.11
## H.latifascia                 0.14
## H.lucina                     0.23
## H.maia.x.lucina              0.01
## H.nevadensis                 0.55
## H.sp.GreatLakesComplex       0.17
## H.slosseri                   0.08
## H.peigleri                   0.00

pairwise_gen_dist_matrix(pop_dataset, genetic_diff_measure = "Ds_Nei", viridis_col = "mako") #compute and plot Nei's standard genetic distance Ds (Nei 1972)
## G values:
##                        H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia                   0.00         0.01     0.03            0.03
## H.latifascia             0.01         0.00     0.03            0.04
## H.lucina                 0.03         0.03     0.00            0.04
## H.maia.x.lucina          0.03         0.04     0.04            0.00
## H.nevadensis             0.08         0.07     0.10            0.10
## H.sp.GreatLakesComplex   0.02         0.02     0.04            0.04
## H.slosseri               0.02         0.02     0.04            0.05
## H.peigleri               0.02         0.02     0.03            0.03
##                        H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia                         0.08                   0.02       0.02
## H.latifascia                   0.07                   0.02       0.02
## H.lucina                       0.10                   0.04       0.04
## H.maia.x.lucina                0.10                   0.04       0.05
## H.nevadensis                   0.00                   0.09       0.09
## H.sp.GreatLakesComplex         0.09                   0.00       0.02
## H.slosseri                     0.09                   0.02       0.00
## H.peigleri                     0.08                   0.03       0.01
##                        H.peigleri
## H.maia                       0.02
## H.latifascia                 0.02
## H.lucina                     0.03
## H.maia.x.lucina              0.03
## H.nevadensis                 0.08
## H.sp.GreatLakesComplex       0.03
## H.slosseri                   0.01
## H.peigleri                   0.00

Estimate individual inbreeding

Inbreeding represents the excess of homozygosity in an individual due to inheriting two identical alleles from recent common ancestor. It can lead to inbreeding depression so the associated loss of fitness, often due to recessive deleterious alleles that are more frequent than in the population.

pop_dataset_inbreeding_mean <- sapply(inbreeding(pop_dataset, N = 1000, M = 2000), mean) #compute mean inbreeding values for each individual (mean from the likelihood distribution of each individual)
hist(pop_dataset_inbreeding_mean, main = "Average inbreeding in individuals") #plot distribution of (mean) inbreeding values for each individual

round(tail(sort(pop_dataset_inbreeding_mean), n = 10), 2) #list ten individuals with highest mean inbreeding
##        H.m.sandra_NJ_Hmg113      H.nevadensis_NM_DN2530 
##                        0.69                        0.71 
##        H.nevadensis_NV_DR99   H.m.menyanthevora_NY_DR90 
##                        0.71                        0.72 
##       H.nevadensis_CA_X100b      H.nevadensis_CA_DN2444 
##                        0.73                        0.73 
## H.m.menyanthevora_NY_DN2504        H.nevadensis_CA_DR34 
##                        0.73                        0.74 
##      H.nevadensis_CA_DN2395 H.m.menyanthevora_NY_DN2469 
##                        0.75                        0.75
pop_dataset_inbreeding_df <- data.frame(ID = names(pop_dataset_inbreeding_mean), Inbreeding_Mean = pop_dataset_inbreeding_mean, Population = pop_dataset$pop)
ggplot(pop_dataset_inbreeding_df, aes(x = Inbreeding_Mean)) + #plot distribution of (mean) inbreeding values for each individual for each population
  geom_histogram(binwidth = 0.04, fill = "lightblue", color = "black", alpha = 0.8) +
  facet_wrap(~ Population, scales = "free_y") +
  labs(x = "Individual mean inbreeding value", y = "Frequency") +
  theme_classic() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

pop_dataset_inbreeding_df %>% group_by(Population) %>% top_n(5, Inbreeding_Mean) %>% arrange(Population, desc(Inbreeding_Mean)) %>% mutate(Inbreeding_Mean = round(Inbreeding_Mean, 2)) #list ten individuals with highest mean inbreeding
## # A tibble: 36 Ă— 3
## # Groups:   Population [8]
##    ID                          Inbreeding_Mean Population  
##    <chr>                                 <dbl> <fct>       
##  1 H.m.menyanthevora_NY_DN2469            0.75 H.maia      
##  2 H.m.menyanthevora_NY_DN2504            0.73 H.maia      
##  3 H.m.menyanthevora_NY_DR90              0.72 H.maia      
##  4 H.m.sandra_NJ_Hmg113                   0.69 H.maia      
##  5 H.m.menyanthevora_NY_DN2502            0.66 H.maia      
##  6 H.latifascia_IA_DN2480                 0.51 H.latifascia
##  7 H.latifascia_NE_Hmg041                 0.51 H.latifascia
##  8 H.latifascia_IA_DN2518                 0.5  H.latifascia
##  9 H.latifascia_NE_DN2523                 0.5  H.latifascia
## 10 H.latifascia_IA_DN2479                 0.5  H.latifascia
## # ℹ 26 more rows
Estimate population level inbreeding
inbreeding_individuals <- adegenet::inbreeding(pop_dataset, #compute individual inbreeding coefficients
                                               res.type = "sample", #obtain sample of F values
                                               N = 1000, #sample size
                                               M = 5000) #number of different F values
inbreeding_df <- data.frame(ID = names(inbreeding_individuals), Inbreeding = unlist(inbreeding_individuals),  Population = as.character(pop_dataset$pop)) #convert to dataframe

ggplot(inbreeding_df, aes(x = Inbreeding, color = Population)) + #plot density of inbreeding coefficients
  geom_density(linewidth = 1.4) +  #plot density with line width
  scale_color_viridis_d(option = "magma", direction = -1) +  #reverse magma palette
  labs(x = "Inbreeding Coefficient", y = "Density") + #add axis labels
  theme_classic() +
  theme(legend.position = "right")  #legend on the right

ggplot(inbreeding_df, aes(x = Inbreeding, fill = Population)) + #plot histogram of inbreeding coefficients
  geom_histogram(bins = 30, alpha = 0.6, position = "identity") + #histogram with transparency
  scale_fill_viridis_d(option = "magma", direction = -1) + #reverse magma palette
  labs(x = "Inbreeding Coefficient", y = "Count") + #axis labels
  theme_classic() +
  theme(legend.position = "right")

inbr_ind_point_est <- adegenet::inbreeding(pop_dataset, #compute individual inbreeding coefficients
                                           res.type = "estimate", #obtain point estimate of F values
                                           N = 100, #sample size
                                           M = 500) #number of different F values
inbreeding_merged <- merge(data.frame(ID = names(inbr_ind_point_est), Inbreeding = unlist(inbr_ind_point_est)),  data.frame(ID = indNames(pop_dataset), Population = as.character(pop_dataset$pop)),  by = "ID") #merge inbreeding coefficients with population info
population_inbreeding <- inbreeding_merged %>%group_by(Population) %>% #calculate population-level inbreeding statistics
  summarize(Average_Inbreeding = mean(Inbreeding, na.rm = T),  SD_Inbreeding = sd(Inbreeding, na.rm = T)) 
print(population_inbreeding %>% mutate(Average_Inbreeding = round(Average_Inbreeding, 2), SD_Inbreeding = round(SD_Inbreeding, 2))) #print population inbreeding statistics for populations (mean and SD)
## # A tibble: 8 Ă— 3
##   Population             Average_Inbreeding SD_Inbreeding
##   <chr>                               <dbl>         <dbl>
## 1 H.latifascia                         0.08          0.1 
## 2 H.lucina                             0.2           0.2 
## 3 H.maia                               0.33          0.24
## 4 H.maia.x.lucina                      0            NA   
## 5 H.nevadensis                         0.61          0.29
## 6 H.peigleri                           0.14          0.15
## 7 H.slosseri                           0.2           0.17
## 8 H.sp.GreatLakesComplex               0.16          0.15
ggplot(population_inbreeding, aes(x = reorder(Population, Average_Inbreeding), y = Average_Inbreeding)) + #bar plot of population inbreeding coefficients with SD error bars
  geom_bar(stat = "identity", aes(fill = Population)) + #bar plot with fill color
  geom_errorbar(aes(ymin = Average_Inbreeding - SD_Inbreeding, ymax = Average_Inbreeding + SD_Inbreeding),  width = 0.2) + #add error bars
  scale_fill_manual(values = viridis::magma(n = length(unique(population_inbreeding$Population)))) +
  labs(x = NULL, y = "Average inbreeding coefficient (with SD)") + #axis labels
  theme_classic() +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) #remove x-axis labels and ticks

Estimate linkage disequilibrium

We will use the corrected index of association rd (Agapow & Burt 2001) to estimate linkage between loci for each population. This is a less biased version of the original index of association by Brown et al. (1980) and accounts for the number of sampled loci. It uses a permutation test with the null hypothesis of unlinked loci.

results_list <- list() #initialize an empty list to store results
messages <- c() #initialize an empty list to store messages
get_num_samples <- function(pop) {length(indNames(popsub(pop_dataset, pop)))} #create function to get number of samples for each population
for (pop in unique(pop_dataset$pop)) { #loop through each population to compute index of association
  pop_data <- popsub(pop_dataset, pop) #subset data for current population
  ia_result <- ia(pop_data, sample = 100) #compute IA with 1000 permutations
  p_value <- ia_result["p.Ia"] #extract p-value
  ia_value <- ia_result["Ia"] #extract IA values
  if (!is.na(p_value) && !is.na(ia_value)) {results_list[[pop]] <- c(IA = ia_value, p_value = p_value) #store IA and p-value if available
  } else {results_list[[pop]] <- c(IA = NA, p_value = NA) #store NA if no p-value is available
  messages <- c(messages, paste("For population", pop, "no p-value could be calculated"))}} 

results_df <- do.call(rbind, lapply(names(results_list), function(pop) { #create data frame from results list
  data.frame(Population = pop, IA = results_list[[pop]][1], p_value = results_list[[pop]][2], n = get_num_samples(pop), stringsAsFactors = F)}))
for (message in messages) {print(message)} #print message(s)
## [1] "For population H.maia.x.lucina no p-value could be calculated"
print(results_df) #print test results
##                    Population       IA    p_value  n
## IA.Ia                  H.maia 3.981871 0.00990099 75
## IA.Ia1           H.latifascia 3.076350 0.06930693 15
## IA.Ia2               H.lucina 2.447624 0.49504950 10
## IA            H.maia.x.lucina       NA         NA  1
## IA.Ia3           H.nevadensis 2.306872 0.00990099 22
## IA.Ia4 H.sp.GreatLakesComplex 3.227128 0.05940594 10
## IA.Ia5             H.slosseri 2.900211 0.00990099 19
## IA.Ia6             H.peigleri 3.346096 0.00990099 12

4) Isolation-by-distance (IBD) using Mantel test

A) IBD among all individuals

Calculate Edwards’ genetic distance matrix between individuals (by transforming allele frequencies to square root values and compute Euclidean distance)
genetic_dist <- dist(sqrt(adegenet::tab(pop_dataset, freq = T)), method = "euclidean")
Calculate geographic distance matrix (using Vincenty formula from geosphere package to compute distance between coordinates)
geo_dist <- geosphere::distm(as.matrix(dataset[, c("Longitude", "Latitude")]), fun = geosphere::distVincentySphere)
Perform Mantel test to compare genetic and geographic distances to evaluate the correlation between the two distance matrices
mantel_test <- ade4::mantel.randtest(as.dist(genetic_dist), as.dist(geo_dist), nrepet = 1000)
mantel_test #print Mantel test results
## Monte-Carlo test
## Call: ade4::mantel.randtest(m1 = as.dist(genetic_dist), m2 = as.dist(geo_dist), 
##     nrepet = 1000)
## 
## Observation: 0.4374899 
## 
## Based on 1000 replicates
## Simulated p-value: 0.000999001 
## Alternative hypothesis: greater 
## 
##       Std.Obs   Expectation      Variance 
## 21.4443437587 -0.0002689246  0.0004167200
plot(mantel_test) #plot results: Black dot indicates observed correlation and histograms show permuted values

Plot IBD
combined_data <- data.frame(genetic_dist = reshape2::melt(as.matrix(genetic_dist))$value, #combine genetic and geographic distance matrices into one data frame
                            geo_dist = reshape2::melt(as.matrix(geo_dist))$value)
combined_data <- combined_data[combined_data$genetic_dist != 0 & combined_data$geo_dist != 0, ] #remove self-comparisons where distance is 0

svg("IBD across all individuals.svg", width = 25/2.56, height = 15/2.56)
ggplot(combined_data, aes(x = geo_dist / 1000, y = genetic_dist)) + #plot IBD
  geom_point(alpha = 0.2, color = "black", size = 1.5) + #scatter plot with points
  geom_smooth(method = "loess", color = "darkgrey", linewidth = 0.9) + #LOESS smooth fit
  geom_smooth(method = "lm", color = "orange", linewidth = 1.5) + #linear model fit
  labs(x = "Geographic Distance (km)", y = "Genetic Distance (Edward's)", title = "Isolation-by-distance") +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5), #center plot title
        axis.text = element_text(color = "black")) #color axis text

B) IBD among individuals within each population

Prepare data
population_subsets <- list() #define list to store subsets for each population
populations <- unique(pop_dataset@pop) #extract unique populations from genind object
for (pop in populations) { #create subsets for each population
  pop_genind_subset <- pop_dataset[pop_dataset@pop == pop, ] #subset genind for population
  pop_coords_subset <- dataset[dataset$Species == pop, ] #subset coordinates for population
  population_subsets[[pop]] <- list(genind_obj = pop_genind_subset, coords_df = pop_coords_subset)} #store subsets in list
Create function to compute distance matrices and run Mantel test for each population
analyze_population <- function(genind_obj, coords_df) {
  if (nrow(coords_df) <= 3) { #check if population has more than 3 individuals
    cat("Skipping population", unique(genind_obj@pop), "due to fewer than 3 individuals.\n")
    return(NULL)}  #skip populations with 3 or fewer individuals
  genetic_dist <- tryCatch({ #calculate genetic distance (Edwards' distance)
    dist(sqrt(adegenet::tab(genind_obj, freq = T)), method = "euclidean")
  }, error = function(e) {cat("Error in genetic distance calculation:", e$message, "\n")
    return(NULL)})
  geo_dist <- tryCatch({ #calculate geographic distance (Vincenty distance)
    geosphere::distm(as.matrix(coords_df[, c("Longitude", "Latitude")]), fun = geosphere::distVincentySphere)
  }, error = function(e) {cat("Error in geographic distance calculation:", e$message, "\n")
    return(NULL)})
  mantel_test <- NULL #perform Mantel test if both distance matrices are available
  if (!is.null(genetic_dist) && !is.null(geo_dist)) {
    mantel_test <- tryCatch({
      ade4::mantel.randtest(as.dist(genetic_dist), as.dist(geo_dist), nrepet = 1000)
    }, error = function(e) {cat("Error in Mantel test:", e$message, "\n")
      return(NULL)})
  } else {cat("Skipping Mantel test due to NULL distance matrices.\n")}
  plot_data <- NULL #prepare plot data and generate plot if distance matrices are available
  combined_data <- NULL #initialize combined_data to store genetic and geographic distances
  if (!is.null(genetic_dist) && !is.null(geo_dist)) {
    genetic_dist_matrix <- as.matrix(genetic_dist)
    geo_dist_matrix <- as.matrix(geo_dist)
    combined_data <- data.frame( #combine matrices for plotting
      genetic_dist = reshape2::melt(genetic_dist_matrix)$value,
      geo_dist = reshape2::melt(geo_dist_matrix)$value,
      Population = unique(genind_obj@pop)) #add population information
    combined_data <- combined_data[combined_data$genetic_dist != 0 & combined_data$geo_dist != 0, ] #remove self-comparisons
    mantel_p_value <- ifelse(is.null(mantel_test), NA, mantel_test$pvalue) #get Mantel p-value
    plot_title <- paste("Isolation-by-distance for", unique(genind_obj@pop), "\nMantel test p-value:", format(mantel_p_value, digits = 3)) #title with p-value
    plot_data <- ggplot(combined_data, aes(x = geo_dist / 1000, y = genetic_dist)) + #plot genetic vs geographic distance
      geom_point(alpha = 0.5, color = "black", size = 1.8) + #scatter plot with points
      geom_smooth(method = "loess", color = "darkgrey", linewidth = 0.9) + #LOESS smooth fit
      geom_smooth(method = "lm", color = "orange", linewidth = 1.5) +  #linear model fit
      labs(x = "Geographic distance (km)", y = "Genetic distance (Edwards')", title = plot_title) + #include plot title with p-value
      theme_classic() +
      theme(plot.title = element_text(hjust = 0.5), #center plot title
            axis.text = element_text(color = "black")) #color axis text
  } else {cat("Skipping plot due to missing distance matrices.\n")}
  return(list(mantel_test = mantel_test, plot_data = plot_data, combined_data = combined_data, num_individuals = nrow(coords_df)))} #return combined_data
results_list <- list() #initialize results list to store Mantel test results and plots
results_df <- data.frame(Population = character(), Num_Individuals = integer(), #initialize dataframe to store results
                         Mantel_p_value = numeric(), stringsAsFactors = F)
all_data <- data.frame() #initialize dataframe to store combined data for all populations
Loop through each population and perform analysis
for (pop in names(population_subsets)) {
  cat("Analyzing population:", pop, "\n")
  pop_data <- population_subsets[[pop]]$genind_obj #extract genind object for population
  pop_coords <- population_subsets[[pop]]$coords_df #extract coordinates dataframe for population
  results <- analyze_population(pop_data, pop_coords) #analyze population
  if (!is.null(results)) { #store results if available
    results_list[[pop]] <- results
    results_df <- rbind(results_df, data.frame( #store results in dataframe
      Population = pop, Num_Individuals = results$num_individuals,
      Mantel_p_value = ifelse(is.null(results$mantel_test), NA, results$mantel_test$obs), #check if mantel_test is NULL
      stringsAsFactors = F)) 
    if (!is.null(results$combined_data)) {
      all_data <- rbind(all_data, results$combined_data)} #append combined data for each population
    if (!is.null(results$plot_data)) { #save plot if available
      ggsave(paste0("Isolation_by_Distance_", pop, ".png"), plot = results$plot_data, 
             width = 8, height = 6, dpi = 300)}}}
## Analyzing population: H.maia
## Analyzing population: H.latifascia
## Analyzing population: H.lucina
## Analyzing population: H.maia.x.lucina 
## Skipping population 1 due to fewer than 3 individuals.
## Analyzing population: H.nevadensis
## Analyzing population: H.sp.GreatLakesComplex
## Analyzing population: H.slosseri
## Analyzing population: H.peigleri

Save result dataframe as csv file
write.csv(results_df, "Mantel_test_results.csv", row.names = F)
Plot IBD for combined data with trend lines for each population
svg("IBD across populations.svg", width = 25/2.56, height = 15/2.56)
ggplot(all_data, aes(x = geo_dist / 1000, y = genetic_dist, color = Population)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "loess", se = F, aes(color = Population), linewidth = 0.6) +
  geom_smooth(method = "lm", se = F, aes(color = Population), linewidth = 1.6) +
  scale_color_manual(values = viridis::magma(n = length(unique(all_data$Population)))) +
  labs(x = "Geographic distance (km)", y = "Genetic Distance (Edwards')", 
       title = "Isolation-by-distance across populations") +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5), #center plot title
        axis.text = element_text(color = "black")) #color axis text
dev.off()

5) PCA (Principal component analysis)

Genetic clusters are inferred through eigenvector decomposition of the allele frequencies (matrices) among individuals, aiming to reduce the complexity of genetic data by projecting it into a lower-dimensional space with each axis (principal component) capturing a portion of the variance in the allele frequencies (Patterson et al. 2006, Reich et al. 2008).

Define plotting parameters
cols_pop <- viridis::magma(n = length(levels(population_assignment)), begin = 0, end = 1) #define color palette
shapes_pop <- rep(c(21, 24, 22, 25), length.out = length(cols_pop)) #define shape palette
plot_width <- 25/2.56
plot_height <- 15/2.56
point_size <- 3.5
point_alpha <- 0.9
density_plot_alpha <- 0.7
density_plot_height <- 5/2.56
point_outline_color <- "black"
point_size_3D_plot <- 3
point_alpha_3D_plot <- 1
point_size_3D_plot <- 7
population_assignment_name <- "Species"
Define datasets to be used for PCA
pop_data_datasets <- list(pop_dataset = list(data = pop_dataset))
Create function to handle missing and infinite values
handle_missing_infinite_values <- function(data) {
  data$tab <- as.matrix(data$tab)
  data$tab <- data$tab[, colSums(is.na(data$tab) | is.infinite(data$tab)) < nrow(data$tab)]
  data$tab <- apply(data$tab, 2, function(column) {
    column_mean <- mean(column, na.rm = T)
    column[is.na(column) | is.infinite(column)] <- column_mean
    return(column)})
  return(data)}
Create function to perform PCA
perform_pca_and_check <- function(data) {
  data <- handle_missing_infinite_values(data)
  prcomp(data$tab)}
Create function to calculate proportion of variance explained by each PC
calculate_variance_explained <- function(pca_result) {
  (pca_result$sdev^2 / sum(pca_result$sdev^2)) * 100}
Create function to calculate number of PCs that explain at least 1% of variance
calculate_num_pcs_above_threshold <- function(var_expl_perc, threshold = 1) {
  length(which(var_expl_perc >= threshold))}
Create function to create data frames for scree plot
create_scree_plot_df <- function(var_expl_perc) {
  data.frame(PC = seq_along(var_expl_perc), VarianceExplained = var_expl_perc)}
Create function to create PCA data frames for plotting (retain 60% of total PCs)
create_pca_df <- function(pca_result, group) {
  num_pcs <- ncol(pca_result$x)  # total number of PCs
  num_pcs_to_keep <- ceiling(num_pcs * 0.5) #keep 50% of total PCs
  data.frame(pca_result$x[, 1:num_pcs_to_keep], group = group)}
Create function to plot scree plots with threshold line
plot_scree <- function(scree_plot_df, threshold) {
  num_pcs <- nrow(scree_plot_df)  #total number of PCs
  num_pcs_to_keep <- ceiling(num_pcs * 0.5) #keep 50% of total PCs
  ggplot(scree_plot_df[1:num_pcs_to_keep, ], aes(x = PC, y = VarianceExplained)) +
    geom_bar(stat = "identity", fill = "grey") +
    geom_line(aes(y = cumsum(VarianceExplained)), color = "orange") +
    geom_point(aes(y = cumsum(VarianceExplained)), color = "orange", size = 2) +
    labs(x = "PC", y = "Explained variance (%)") +
    geom_hline(yintercept = threshold, color = "black", linetype = "dashed") +
    theme_classic()}
Run PCA and plot scree plots
run_pca_for_datasets <- function(datasets) {
  lapply(datasets, function(subset) {
    pca_result <- perform_pca_and_check(subset$data)
    var_expl_perc <- calculate_variance_explained(pca_result)
    num_pcs <- calculate_num_pcs_above_threshold(var_expl_perc)
    scree_plot_df <- create_scree_plot_df(var_expl_perc)
    print(plot_scree(scree_plot_df, threshold = 1))
    list(pca_result = pca_result, variance_explained = var_expl_perc, num_pcs_above_1perc = num_pcs)})}
Run PCAs
pca_results <- run_pca_for_datasets(pop_data_datasets)

Plot PCA
pca_df_pop_dataset <- create_pca_df(pca_results$pop_dataset$pca_result, 
                                    factor(pop_dataset$pop, levels = unique(population_assignment)))
pca_df_pop_dataset_filtered_n2 <- pca_df_pop_dataset %>% dplyr::group_by(group) %>% dplyr::filter(n() > 1) #filter out groups for density plots with fewer than 2 data points

svg("PCA_PC1_PC2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(pca_df_pop_dataset, aes(x = PC1, y = PC2)) + #PCA scatterplot of PC1 and PC2
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), 
             color = point_outline_color) +
  scale_shape_manual(values = shapes_pop) +
  scale_fill_manual(values = cols_pop) +
  labs(x = sprintf("PC1 (%.2f%%)", pca_results$pop_dataset$variance_explained[1]),
       y = sprintf("PC2 (%.2f%%)", pca_results$pop_dataset$variance_explained[2]),
       shape = population_assignment_name,
       fill = population_assignment_name) +
  theme_classic() +
  guides(override.aes = list(alpha = 1))
dev.off()

svg("PCA_PC1_PC3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(pca_df_pop_dataset, aes(x = PC1, y = PC3)) + #PCA scatterplot of PC1 and PC3
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), 
             color = point_outline_color) +
  scale_shape_manual(values = shapes_pop) +
  scale_fill_manual(values = cols_pop) +
  labs(x = sprintf("PC1 (%.2f%%)", pca_results$pop_dataset$variance_explained[1]),
       y = sprintf("PC3 (%.2f%%)", pca_results$pop_dataset$variance_explained[2]),
       shape = population_assignment_name,
       fill = population_assignment_name) +
  theme_classic() +
  guides(override.aes = list(alpha = 1))
dev.off()

svg("PCA_density_PC1_pop_dataset.svg", width = plot_width, height = density_plot_height) #density plot for PC1
ggplot(pca_df_pop_dataset_filtered_n2, aes(x = PC1, fill = group, color = group)) +
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop) +
  scale_color_manual(values = cols_pop) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()
svg("PCA_density_PC2_pop_dataset.svg", width = plot_width, height = density_plot_height) #density plot for PC2
ggplot(pca_df_pop_dataset_filtered_n2, aes(x = PC2, fill = group, color = group)) +
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop) +
  scale_color_manual(values = cols_pop) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()
svg("PCA_density_PC3_pop_dataset.svg", width = plot_width, height = density_plot_height) #density plot for PC3
ggplot(pca_df_pop_dataset_filtered_n2, aes(x = PC3, fill = group, color = group)) +
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop) +
  scale_color_manual(values = cols_pop) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()

plotly::plot_ly( #3D plot of PCA using PC1-PC3
  data = pca_df_pop_dataset,
  x = ~PC1,
  y = ~PC2,
  z = ~PC3,
  color = ~group, colors = cols_pop,
  type = 'scatter3d', mode = 'markers',
  alpha = point_alpha_3D_plot, #marker transparency
  marker = list(size = point_size_3D_plot)) #marker size

6) DAPC - Discriminant analysis of principle components (Jombart et al. 2010)

Overview
Discriminant analysis of principal components (DAPC) is a multivariate method designed to identify and describe genetic clusters by determining the optimal number of clusters (k) that best summarizes the data. It is faster than traditional Bayesian clustering methods and does not assume Hardy-Weinberg Equilibrium (HWE) or linkage between loci. It maximizes variation between groups while minimizing variation within them, offering a distinct advantage over methods like PCA, PCoA, and MANOVA (which focus on global diversity but neglect group-specific differences). Furthermore, it is potentially more powerful and accurate in phylogenetic and hierarchical analyses (including hybridization) than methods that minimize HWE and linkage disequilibrium (e.g., STRUCTURE) (Kanno et al 2011, Dupuis and Sperling 2015, Jombart et al. 2010).
Approach
DAPC first transforms the data through principal component analysis (PCA) to ensure the variables submitted to discriminant analysis (DA) are uncorrelated and that their number is less than the number of analyzed individuals (in contrast to a standard DA), and then then applies DA to the retained principal components, aiming to discriminate between predefined groups. The eigenvalues in a DAPC represent the ratio of variance between groups over the variance within groups for each discriminant function, indicating the discriminant power of each axis in a DAPC plot.
How many PCs to retain?
Choosing the optimal number of retained principal components (PCs) is crucial for the effectiveness of DAPC. Retaining too many components can lead to overfitting and instability in membership probabilities, while retaining too few may result in a loss of valuable information. This can be done using the a-score (representing the difference between the proportion of successful reassignments observed in the analysis and values obtained using random groupings) or cross-validation. The latter optimization procedure is most suitable and finds the optimal number of retained PCs by splitting the data into two sets: a training (usually 90% of the data) and a validation set (typically the remaining 10%). The validation set is selected using stratified random sampling, ensuring that every group or population from the original dataset is represented in both sets. DAPC is then performed on the training set while retaining different numbers of PCs. The method then assesses how well the analysis can predict group membership for the individuals in the validation set. This helps to identify the optimal number of PCs to retain. The process is repeated multiple times to improve accuracy. The final number of PCs used in the analysis should be documented for transparency, along with evidence supporting the choice of k (Miller et al. 2020).
A priori vs de novo clustering
DAPC can be performed either using a priori defined groupings (e.g., populations/species) (similar to a PCA) or it can identify genetic clusters (k) de novo (e.g., similar to Structure). This decision can affect the DAPC results and is therefore important to consider (Miller et al. 2020). Misspecification of groupings can lead to artificially large populations with Wahlund-like effects of apparent depressed heterozygosity (Wahlund 1928), with these inflated population size estimates preventing conservation and increasing risk of extinction for one of “cryptic” genetic clusters, or it can lead to over splitting populations that should be combined. De novo group assignments can be inaccurate under conditions of high migration rates or low genetic differentiation but is mostly reliable when migration rates are low (Miller et al. 2020). For a priori assessments, the genetic distance between clusters reflects underlying FST values (Miller et al. 2020). Similar to other methods, artefactual discrete groups may be identified in populations with continuously distributed (cline-like) genetic diversity and under spatially heterogeneous sampling of populations. However, incorporating scatterplots for a graphical assessment of the genetic structures between clusters can allow to assess the organization of genetic variability and reveal potential clines.
k-means clustering
When grouping information is unknown, DAPC can incorporate k-means clustering to identify the optimal number of clusters k (Lee et al. 2009, Liu & Zhao 2006). The k-means algorithm maximizes variation between groups, and the optimal k is determined by comparing clustering solutions using the Bayesian Information Criterion (BIC), selecting the lowest BIC value.
Additional features
DAPC also provides membership probabilities for each individual, which are based on the retained discriminant functions. These probabilities differ from admixture coefficients in STRUCTURE but can similarly be interpreted as reflecting the proximity of individuals to different genetic clusters. They also offer insights into the distinctiveness of identified clusters and potential admixture events. This feature makes DAPC a potentially more powerful and accurate method for analyzing phylogenetic relationships, hierarchical structures, and hybridization events compared to methods like STRUCTURE, which minimize assumptions about HWE and linkage disequilibrium (Kanno et al. 2011, Dupuis and Sperling 2015, Jombart et al. 2010). Additionally, DAPC allows users to identify and plot alleles that contribute most significantly to the discrimination of clusters, providing further insight into genetic differentiation.

Define number of cross-validation runs

Use a larger number here (e.g., 10000)

crossval_N <- 10 #number of cross-validation runs

A) DAPC with prior grouping

Replace NA values and format genotype matrix
pop_dataset@tab <- matrix(
  as.integer(round(apply(pop_dataset@tab, 2, function(x) {
    x[is.na(x)] <- mean(x, na.rm = T) #replace NA with column means
    return(x)}))), #return modified column
  nrow = nrow(pop_dataset@tab), #number of rows
  ncol = ncol(pop_dataset@tab), #number of columns
  dimnames = dimnames(pop_dataset@tab)) #preserve original dimensions
Remove groups with fewer than 2 members and adjust color and shapes scales accordingly
original_populations <- unique(pop_dataset@pop) #identify populations before filtering
pop_dataset_DAPC <- pop_dataset[adegenet::pop(pop_dataset) %in% names(table(adegenet::pop(pop_dataset))[table(adegenet::pop(pop_dataset)) > 1]), ] #filter populations with fewer than 2 members
remaining_populations <- unique(pop_dataset_DAPC@pop) #identify remaining populations
removed_populations <- setdiff(original_populations, remaining_populations) #identify removed populations
if (length(removed_populations) > 0) { #check if any populations were removed
  cols_pop_DAPC <- cols_pop[-(which(original_populations %in% removed_populations))] #remove colors for removed populations
  shapes_pop_DAPC <- shapes_pop[-which(original_populations %in% removed_populations)] #remove shapes for removed populations
} else { #no populations were removed so keep original color and shape assignments
  cols_pop_DAPC <- cols_pop
  shapes_pop_DAPC <- shapes_pop}
Cross-validation to determine optimal number of PCs
xval_results_pop_dataset_DAPC_run1 <- adegenet::xvalDapc( #run1 of cross-validation
  pop_dataset_DAPC@tab, #genetic data matrix (allele frequencies)
  adegenet::pop(pop_dataset_DAPC), #population assignment (grouping factor) of individuals
  training.set = 0.9, #use 90% of data for training during cross-validation
  n.pca.max = ceiling(min(nrow(pop_dataset_DAPC@tab), ncol(pop_dataset_DAPC@tab)) * 0.6), #set maximum number of PCs to evaluate in cross-validation to 60% of total numbers of PCs
  result = "groupMean", #use group means to calculate the cross-validation result
  center = T, #center data before performing PCA
  scale = F, #do not scale data (i.e., do not standardize allele frequencies)
  n.rep = crossval_N, #set number of cross-validation replicates
  xval.plot = T)  #plot cross-validation results (mean squared error (MSE) vs. number of PCs)

optimal_pcs_pop_dataset_DAPC_run1 <- xval_results_pop_dataset_DAPC_run1[["Number of PCs Achieving Lowest MSE"]] #extract optimal number of PCs corresponding to lowest MSE from cross-validation results
print(optimal_pcs_pop_dataset_DAPC_run1) #print optimal number of PCs for dataset
## [1] "40"
xval_results_pop_dataset_DAPC_run2 <- adegenet::xvalDapc( #run2 of cross-validation
  pop_dataset_DAPC@tab, adegenet::pop(pop_dataset_DAPC),
  training.set = 0.9, result = "groupMean", center = T, scale = F, n.rep = crossval_N, xval.plot = T,
  n.pca = seq(as.numeric(optimal_pcs_pop_dataset_DAPC_run1) - 15, as.numeric(optimal_pcs_pop_dataset_DAPC_run1) + 15, by = 1)) #refine PCs range

optimal_pcs_pop_dataset_DAPC_run2 <- xval_results_pop_dataset_DAPC_run2[["Number of PCs Achieving Lowest MSE"]] #extract refined optimal PCs
print(xval_results_pop_dataset_DAPC_run2[2:6]) #print cross-validation results
## $`Median and Confidence Interval for Random Chance`
##      2.5%       50%     97.5% 
## 0.0977508 0.1433356 0.1981986 
## 
## $`Mean Successful Assignment by Number of PCs of PCA`
##        25        26        27        28        29        30        31        32 
## 0.8285714 0.8081633 0.7989796 0.7928571 0.7765306 0.8530612 0.7938776 0.8724490 
##        33        34        35        36        37        38        39        40 
## 0.8806122 0.8663265 0.8326531 0.8918367 0.8387755 0.8448980 0.8173469 0.8928571 
##        41        42        43        44        45        46        47        48 
## 0.8459184 0.8377551 0.8959184 0.8642857 0.8632653 0.8520408 0.8336735 0.8806122 
##        49        50        51        52        53        54        55 
## 0.8418367 0.8479592 0.8602041 0.9367347 0.8346939 0.8469388 0.8336735 
## 
## $`Number of PCs Achieving Highest Mean Success`
## [1] "52"
## 
## $`Root Mean Squared Error by Number of PCs of PCA`
##        25        26        27        28        29        30        31        32 
## 0.1999583 0.2191860 0.2244666 0.2364839 0.2317697 0.2013594 0.2514846 0.1660164 
##        33        34        35        36        37        38        39        40 
## 0.1417229 0.1592948 0.2025966 0.1372060 0.1922596 0.1864306 0.2146498 0.1406165 
##        41        42        43        44        45        46        47        48 
## 0.1920700 0.2010230 0.1267937 0.1567913 0.1682281 0.2015920 0.2180190 0.1462783 
##        49        50        51        52        53        54        55 
## 0.1721135 0.1846065 0.1791100 0.1069240 0.2158833 0.2068184 0.2150860 
## 
## $`Number of PCs Achieving Lowest MSE`
## [1] "52"
Perform DAPC
dapc_result_pop_dataset <- adegenet::dapc(
  pop_dataset_DAPC, pop = adegenet::pop(pop_dataset_DAPC),
  n.pca = as.numeric(optimal_pcs_pop_dataset_DAPC_run2),
  n.da = length(levels(pop_dataset_DAPC$pop)) - 1) #perform DAPC with optimal PCs (determined by crossvalidation)
Save (and load) results of crossvalidation and DAPC
save(xval_results_pop_dataset_DAPC_run1, optimal_pcs_pop_dataset_DAPC_run1, 
     xval_results_pop_dataset_DAPC_run2, optimal_pcs_pop_dataset_DAPC_run2,
     dapc_result_pop_dataset,
     file = "DAPC_results_pop_dataset.Rdata")
load("DAPC_results_pop_dataset.Rdata")
Evaluate DAPC results
print(round(dapc_result_pop_dataset$var, 2) * 100) #conserved variance in percent
## [1] 87
barplot(dapc_result_pop_dataset$eig, names.arg = paste0("LD", seq_along(dapc_result_pop_dataset$eig)), #plot eigenvalues of discriminant functions 
        xlab = "Linear discriminants", ylab = "Eigenvalues") #

round(dapc_result_pop_dataset$eig, 1) #show eigenvalues of discriminant functions
## [1] 953.8 261.1 116.6  83.0  63.9  25.2
dapc_relative_LDs <- round(dapc_result_pop_dataset$eig / sum(dapc_result_pop_dataset$eig) * 100, 1)
barplot(dapc_relative_LDs, #plot relative proportions of eigenvalues of discriminant functions
        names.arg = paste0("LD", seq_along(dapc_result_pop_dataset$eig)), 
        xlab = "Linear discriminants", ylab = "Relative proportion of eigenvalues [%]")

Plot DAPC results
LD1_label <- paste("LD1 (", dapc_relative_LDs[1], "%)", sep = "") #create axis labels for LD1 showing relative proportions of eigenvalues of discriminant functions in percent
LD2_label <- paste("LD2 (", dapc_relative_LDs[2], "%)", sep = "") #create axis labels for LD2 showing relative proportions of eigenvalues of discriminant functions in percent
LD3_label <- paste("LD3 (", dapc_relative_LDs[3], "%)", sep = "") #create axis labels for LD3 showing relative proportions of eigenvalues of discriminant functions in percent
dapc_df_pop_dataset <- data.frame(dapc_result_pop_dataset$ind.coord, group = factor(dapc_result_pop_dataset$grp, levels = levels(population_assignment))) #create dataframe for visualization

svg("DAPC_LD1_LD2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD1, y = LD2)) + #DAPC scatterplot of LD1 vs LD2
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
  scale_shape_manual(values = shapes_pop_DAPC) +
  scale_fill_manual(values = cols_pop_DAPC) +
  labs(shape = population_assignment_name, fill = population_assignment_name, x = LD1_label, y = LD2_label) +
  theme_classic() +
  guides(override.aes = list(alpha = 1))
dev.off()

svg("DAPC_LD1_LD3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD1, y = LD3)) + #DAPC scatterplot of LD1 vs LD3
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
  scale_shape_manual(values = shapes_pop_DAPC) +
  scale_fill_manual(values = cols_pop_DAPC) +
  labs(shape = population_assignment_name, fill = population_assignment_name, x = LD1_label, y = LD3_label) +
  theme_classic() +
  guides(override.aes = list(alpha = 1))
dev.off()

svg("DAPC_densi_LD1_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD1, fill = group)) + #density plot for LD1
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()

svg("DAPC_densi_LD2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD2, fill = group)) + #density plot for LD2
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()

svg("DAPC_densi_LD3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD3, fill = group)) + #density plot for LD3
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()

plotly::plot_ly( #3D plot of DAPC using LD1-LD3
  data = dapc_df_pop_dataset,
  x = ~LD1, y = ~LD2, z = ~LD3,
  color = ~group, colors = cols_pop_DAPC,
  type = 'scatter3d', mode = 'markers',
  alpha = point_alpha_3D_plot, #marker transparency
  marker = list(size = point_size_3D_plot)) #marker size
Plot DAPC with scaled LD axes

Plot the DAPC with scaled LD axes according to proportion of variance explained by each respective eigenvalue to ensure that distances between points in 3D plot reflect each discriminant function’s contribution

eig_proportions_dapc <- dapc_result_pop_dataset$eig / sum(dapc_result_pop_dataset$eig) #calculate relative proportions of eigenvalues
dapc_df_pop_dataset_scaled <- dapc_df_pop_dataset
dapc_df_pop_dataset_scaled$LD1 <- dapc_df_pop_dataset_scaled$LD1 * sqrt(eig_proportions_dapc[1]) #scale LD1
dapc_df_pop_dataset_scaled$LD2 <- dapc_df_pop_dataset_scaled$LD2 * sqrt(eig_proportions_dapc[2]) #scale LD2
dapc_df_pop_dataset_scaled$LD3 <- dapc_df_pop_dataset_scaled$LD3 * sqrt(eig_proportions_dapc[3]) #scale LD3

ggplot(dapc_df_pop_dataset_scaled, aes(x = LD1, y = LD2)) + #DAPC scaled scatterplot of LD1 vs LD2
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
  scale_shape_manual(values = shapes_pop_DAPC) +
  scale_fill_manual(values = cols_pop_DAPC) +
  labs(shape = population_assignment_name, fill = population_assignment_name, x = paste("LD1 (scaled)"), y = paste("LD2 (scaled)")) +
  theme_classic() +
  xlim(range(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2)))) +  
  ylim(range(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2)))) +
  guides(override.aes = list(alpha = 1))

ggplot(dapc_df_pop_dataset_scaled, aes(x = LD1, y = LD3)) + #DAPC scaled scatterplot of LD1 vs LD3
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
  scale_shape_manual(values = shapes_pop_DAPC) +
  scale_fill_manual(values = cols_pop_DAPC) +
  labs(shape = population_assignment_name, fill = population_assignment_name, x = paste("LD1 (scaled)"), y = paste("LD3 (scaled)")) +
  theme_classic() +
  xlim(range(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD3)))) +  
  ylim(range(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD3)))) +
  guides(override.aes = list(alpha = 1))

plotly::plot_ly( #3D plot of DAPC_kmeans using scaled LD1-LD3
  data = dapc_df_pop_dataset_scaled,
  x = ~LD1, y = ~LD2, z = ~LD3,
  color = ~group, colors = cols_pop_DAPC,
  type = 'scatter3d', mode = 'markers',
  alpha = point_alpha_3D_plot, marker = list(size = point_size_3D_plot)
) %>% layout(scene = list(xaxis = list(title = "LD1", range = c(min(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))), max(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))))),
                          yaxis = list(title = "LD2", range = c(min(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))), max(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))))),
                          zaxis = list(title = "LD3", range = c(min(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))), max(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3)))))))
Create Structure-like plot showing cluster posterior probabilities for individuals across populations
dapc_group_memberships <- as.data.frame(dapc_result_pop_dataset$posterior) #convert posterior probabilities to data frame
dapc_group_memberships$ID <- row.names(dapc_group_memberships) #add ID column
dapc_group_memberships$Population <- pop_dataset$pop[match(dapc_group_memberships$ID, indNames(pop_dataset))] #match populations to IDs
dapc_group_memberships_long <- melt(dapc_group_memberships, id.vars = c("ID", "Population"), variable.name = "Cluster", value.name = "Probability") #reshape to long format

svg("DAPC_membership_probabilities.svg", width = 30/2.56, height = 25/2.56)
ggplot(dapc_group_memberships_long, aes(x = Probability, y = ID, fill = Cluster)) + #create Structure-like plots 
  geom_bar(stat = "identity") + 
  theme_classic() +
  scale_fill_manual(values = cols_pop_DAPC) + #use viridis color palette
  facet_wrap(~ Population, scales = "free_y") + #split y-axis by population
  labs(fill = "Cluster") +
  theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 4), #modify size of y-axis labels
        axis.text.x = element_blank(), #remove x-axis labels
        axis.title.y = element_blank(), #remove y-axis title
        axis.title.x = element_blank(), #remove x-axis title
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        panel.background = element_blank(), 
        plot.background = element_blank())
dev.off()

Assess allele differentiation
dapc_loading_LD1 <- adegenet::loadingplot(dapc_result_pop_dataset$var.contr[, 1], lab.jitter = 100, #plot loading for first LD axis 
                                          thres = quantile(dapc_result_pop_dataset$var.contr[, 1], 0.99)) #set threshold to 99th percentile (top 1% of loadings)

dapc_loading_LD2 <- adegenet::loadingplot(dapc_result_pop_dataset$var.contr[, 2], lab.jitter = 100, #plot loading for second LD axis 
                                          thres = quantile(dapc_result_pop_dataset$var.contr[, 2], 0.99)) #set threshold to 99th percentile (top 1% of loadings)

dapc_loading_LD1$var.names #print most differentiated alleles for LD1
## [1] "ctg00000001_1_12521730.0" "ctg00000001_1_12521730.1"
## [3] "ctg00000003_1_4110250.0"  "ctg00000003_1_4110250.1" 
## [5] "ctg00000005_1_16585380.0" "ctg00000005_1_16585380.1"
dapc_loading_LD2$var.names #print most differentiated alleles for LD2
## [1] "ctg00000018_1_4211457.0"  "ctg00000018_1_4211457.1" 
## [3] "ctg00000018_1_10958111.0" "ctg00000018_1_10958111.1"
## [5] "ctg00000018_1_12472987.0" "ctg00000018_1_12472987.1"
intersect(dapc_loading_LD1$var.names, dapc_loading_LD2$var.names) #show differentiated alleles for both LD1 and LD2
## character(0)
Calculate allele frequencies per population and show allele frequencies of differentiated SNPs
pop_dataset_DAPC_loci <- adegenet::seploc(pop_dataset_DAPC) #separate loci
snp_names <- names(pop_dataset_DAPC_loci) #extract loci names
snp_data <- lapply(snp_names, function(snp) { #separate loci and process SNPs
  if (snp %in% names(pop_dataset_DAPC_loci)) {
    adegenet::tab(pop_dataset_DAPC_loci[[snp]]) #process SNP if exists
  } else {NULL}}) #return NULL if SNP does not exist
snp_frequencies <- lapply(snp_data, function(data) { #calculate allele frequencies per population
  if (!is.null(data)) {
    apply(data, 2, function(e) tapply(e, adegenet::pop(pop_dataset_DAPC), mean, na.rm = T)) #calculate frequencies
  } else {NULL}}) #return NULL for non-existent data
for (i in seq_along(snp_frequencies)) { #print allele frequencies of differentiated SNPs
  if (!is.null(snp_frequencies[[i]])) {
    cat("Allele frequencies for SNP:", snp_names[i], "\n")
    print(snp_frequencies[[i]])
  } else {cat("No data for SNP:", snp_names[i], "\n")}}
## Allele frequencies for SNP: Chromosome24_358443 
##                        Chromosome24_358443.1 Chromosome24_358443.0
## H.maia                            0.62666667             1.3733333
## H.latifascia                      0.40000000             1.6000000
## H.lucina                          0.40000000             1.6000000
## H.nevadensis                      0.09090909             1.9090909
## H.sp.GreatLakesComplex            0.60000000             1.4000000
## H.slosseri                        1.10526316             0.8947368
## H.peigleri                        0.41666667             1.5833333
## Allele frequencies for SNP: Chromosome24_358452 
##                        Chromosome24_358452.0 Chromosome24_358452.1
## H.maia                              1.960000            0.04000000
## H.latifascia                        1.933333            0.06666667
## H.lucina                            2.000000            0.00000000
## H.nevadensis                        2.000000            0.00000000
## H.sp.GreatLakesComplex              1.900000            0.10000000
## H.slosseri                          2.000000            0.00000000
## H.peigleri                          2.000000            0.00000000
## Allele frequencies for SNP: Chromosome24_358458 
##                        Chromosome24_358458.0 Chromosome24_358458.1
## H.maia                             1.6533333             0.3466667
## H.latifascia                       1.2666667             0.7333333
## H.lucina                           1.1000000             0.9000000
## H.nevadensis                       0.5454545             1.4545455
## H.sp.GreatLakesComplex             1.4000000             0.6000000
## H.slosseri                         1.4736842             0.5263158
## H.peigleri                         1.6666667             0.3333333
## Allele frequencies for SNP: Chromosome24_358474 
##                        Chromosome24_358474.0 Chromosome24_358474.1
## H.maia                                   2.0                   0.0
## H.latifascia                             2.0                   0.0
## H.lucina                                 2.0                   0.0
## H.nevadensis                             2.0                   0.0
## H.sp.GreatLakesComplex                   1.7                   0.3
## H.slosseri                               2.0                   0.0
## H.peigleri                               2.0                   0.0
## Allele frequencies for SNP: Chromosome24_358511 
##                        Chromosome24_358511.0 Chromosome24_358511.1
## H.maia                                   2.0                   0.0
## H.latifascia                             1.8                   0.2
## H.lucina                                 2.0                   0.0
## H.nevadensis                             2.0                   0.0
## H.sp.GreatLakesComplex                   2.0                   0.0
## H.slosseri                               2.0                   0.0
## H.peigleri                               2.0                   0.0
Assess and visualize group memberships
dapc_group_memberships <- as.data.frame(dapc_result_pop_dataset$posterior) #convert posterior probabilities to data frame
subset_size <- 40 #define subset size
plot_assignments <- function(dapc_result, subset_size, max_rows) { #plot group memberships (heat colors represent membership probabilities with red=1 and white=0, blue crosses represent prior Cluster_assignment provided to DAPC)
  num_plots <- ceiling(max_rows / subset_size) #calculate number of plots needed
  for (i in 0:(num_plots - 1)) {
    start_row <- i * subset_size + 1 #start index for subset
    end_row <- min((i + 1) * subset_size, max_rows) #end index for subset
    cat("Plotting rows", start_row, "to", end_row, "\n") #print status
    adegenet::assignplot(dapc_result, subset = start_row:end_row)}} #plot assignments for current subset
plot_assignments(dapc_result_pop_dataset, subset_size, nrow(dapc_group_memberships)) #plot all subsets
## Plotting rows 1 to 40

## Plotting rows 41 to 80

## Plotting rows 81 to 120

## Plotting rows 121 to 160

## Plotting rows 161 to 163

Identify and plot admixed individuals
dapc_result_pop_dataset$grp <- factor(dapc_result_pop_dataset$grp, levels = unique(pop_dataset_DAPC$pop))
admixed_individuals <- which(apply(dapc_result_pop_dataset$posterior, 1, function(e) all(e < 0.80))) #identify admixed individuals (posterior probabilities less than 0.80)
admixed_individuals #show admixed individuals
##           H.slosseri_TX_Hmg028 H.sp.GreatLakesComplex_MI_DR89 
##                             52                            149
if (length(admixed_individuals) > 0) {
  adegenet::compoplot(dapc_result_pop_dataset, col.pal = cols_pop_DAPC, subset = admixed_individuals)}

Assign individuals to clusters based on threshold
threshold_high <- 0.75 #define higher threshold (probabilities above this value will assign individual to single Cluster_assignment)
threshold_low <- 1 - threshold_high #define lower threshold (individuals with probabilities above this value but below higher threshold will be assigned to multiple clusters)
admixed_individuals_cluster <- "High admixture" #set name of admixed individuals (remaining individuals belonging to multiple clusters)
dapc_result_pop_dataset_posterior <- as.data.frame(dapc_result_pop_dataset$posterior)
dapc_result_pop_dataset_posterior[sapply(dapc_result_pop_dataset_posterior, is.numeric)] <- round(dapc_result_pop_dataset_posterior[sapply(dapc_result_pop_dataset_posterior, is.numeric)], 2) #round numeric columns
dapc_result_pop_dataset_posterior$ID <- rownames(dapc_result_pop_dataset_posterior) #add ID column
assign_clusters <- function(probs) {
  if (any(probs > threshold_high)) {
    assigned_cluster <- names(probs)[which.max(probs)]
  } else if (any(probs > threshold_low)) {
    shared_clusters <- names(probs)[probs > threshold_low]
    if (length(shared_clusters) > 3) {
      assigned_cluster <- admixed_individuals_cluster
    } else {assigned_cluster <- paste(shared_clusters, collapse = "+")}
  } else {assigned_cluster <- admixed_individuals_cluster}
  return(assigned_cluster)}
dapc_result_pop_dataset_posterior$Assigned_cluster <- apply(dapc_result_pop_dataset_posterior[1:length(unique(dapc_df_pop_dataset$group))], 1, assign_clusters) #apply function to each individual
dapc_final_dataset <- left_join(dataset, dapc_result_pop_dataset_posterior, by = "ID") #combine datasets
Set DAPC map settings
DAPC_map_point_jitter <- 0 #set degree of jitter (to avoid overlapping points)
DAPC_map_point_alpha <- 0.8 #set transparency of points
DAPC_map_point_outline_color <- "black" #modify color of point outlines
DAPC_map_point_size <- 4 #set size of points
DAPC_map_color_palette <- viridis::mako #set colorblind-friendly viridis color palette
DAPC_kmeans_map_color_palette <- viridis::viridis #set colorblind-friendly viridis color palette
DAPC_map_buffer_percentage <- 0.2 #modify buffer around range of coordinates for map
DAPC_map_state_border_color <- "gray30" #modify state border color for map
DAPC_map_state_border_thickness <- 0.3  #modify state border thickness for map
DAPC_map_country_border_color <- "gray30" #modify country border color for map
DAPC_map_country_border_thickness <- 0.8 #modify country border thickness for map
DAPC_map_width <- 25/2.54 #modify width of plot for map
DAPC_map_height <- 15/2.54 #modify height of plot for map
DAPC_map_filling_color <- "lightgrey" #modify map filling color
Prepare data for DAPC map
dapc_dataset_map <- dapc_final_dataset %>% tidyr::drop_na() #remove empty rows for plotting
dapc_dataset_map$Longitude <- as.numeric(dapc_dataset_map$Longitude) #ensure longitude is numeric
dapc_dataset_map$Latitude <- as.numeric(dapc_dataset_map$Latitude) #ensure latitude is numeric
xlim_range <- range(dapc_dataset_map$Longitude, na.rm = T) #calculate longitude range
ylim_range <- range(dapc_dataset_map$Latitude, na.rm = T) #calculate latitude range
x_buffer <- diff(xlim_range) * DAPC_map_buffer_percentage #calculate longitude buffer
y_buffer <- diff(ylim_range) * DAPC_map_buffer_percentage #calculate latitude buffer
xlim_range <- c(xlim_range[1] - x_buffer, xlim_range[2] + x_buffer) #apply longitude buffer
ylim_range <- c(ylim_range[1] - y_buffer, ylim_range[2] + y_buffer) #apply latitude buffer
Create map with DAPC clusters
dapc_dataset_map$Assigned_cluster <- factor(dapc_dataset_map$Assigned_cluster, levels = c(unique(dapc_dataset_map$Assigned_cluster) %>% .[!stringr::str_detect(., "\\+")] %>% sort(), unique(dapc_dataset_map$Assigned_cluster) %>% .[stringr::str_detect(., "\\+")] %>% sort(), admixed_individuals_cluster)) #reorder factor levels in dataset so that single clusters are first, followed by multiple clusters and admixed_individuals_cluster
svg("DAPC_cluster_map.svg", width = DAPC_map_width, height = DAPC_map_height)
map_DAPC_cluster <- ggplot(data = dapc_dataset_map, aes(x = Longitude, y = Latitude)) +
  theme_void() + #use theme_void for no background
  borders("world", colour = DAPC_map_country_border_color, fill = DAPC_map_filling_color, 
          linewidth = DAPC_map_country_border_thickness) +
  geom_polygon(data = map_data("state"), aes(x = long, y = lat, group = group), #add state borders (only for USA!)
               fill = NA, colour = DAPC_map_state_border_color, 
               linewidth = DAPC_map_state_border_thickness) +
  geom_point(aes(fill = Assigned_cluster, shape = Assigned_cluster), 
             alpha = DAPC_map_point_alpha, color = DAPC_map_point_outline_color, 
             size = DAPC_map_point_size) +
  geom_jitter(aes(fill = Assigned_cluster, shape = Assigned_cluster), 
              width = DAPC_map_point_jitter, height = DAPC_map_point_jitter, 
              alpha = DAPC_map_point_alpha, color = DAPC_map_point_outline_color, 
              size = DAPC_map_point_size) +
  coord_fixed(xlim = xlim_range, ylim = ylim_range) + #set coordinates with buffer
  scale_fill_manual(values = DAPC_map_color_palette(n = length(unique(dapc_dataset_map$Assigned_cluster)), begin = 0, end = 1)) + #assign colors 
  scale_shape_manual(values = rep(c(21, 22, 24, 25), length.out = length(unique(dapc_dataset_map$Assigned_cluster)))) + #assign shapes
  guides(fill = guide_legend(override.aes = list(size = DAPC_map_point_size, alpha = 1)), 
         shape = guide_legend(override.aes = list(size = DAPC_map_point_size, alpha = 1))) + 
  labs(fill = "Cluster", shape = "Cluster") #add labels
suppressWarnings(print(map_DAPC_cluster)) #print map and suppress warnings
dev.off() #close svg device

Save assignment probabilities of DAPC as text file
names(dapc_df_pop_dataset)[names(dapc_df_pop_dataset) == "group"] <- "Cluster"
dapc_df_pop_dataset$ID <- row.names(dapc_df_pop_dataset)
dapc_final_dataset <- left_join(dapc_df_pop_dataset, dapc_final_dataset, by = "ID")
dapc_final_dataset$Assigned_cluster <- factor(dapc_final_dataset$Assigned_cluster, levels = c(unique(dapc_final_dataset$Assigned_cluster) %>% .[!stringr::str_detect(., "\\+")] %>% sort(), unique(dapc_final_dataset$Assigned_cluster) %>% .[stringr::str_detect(., "\\+")] %>% sort(), admixed_individuals_cluster)) #reorder factor levels in dataset so that single clusters are first, followed by multiple clusters and admixed_individuals_cluster
write.table(dapc_final_dataset[order(dapc_final_dataset$ID), ], #sort by ID
            file = "DAPC_final_dataset_assignment_probabilities_clusters", 
            sep = "\t", dec = ",", row.names = F, col.names = T, quote = F)
Plot DAPC results with assigned clusters (including mixed clusters)
svg("DAPC_LD1_LD2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_final_dataset, aes(x = LD1, y = LD2)) + #DAPC scatterplot of LD1 vs LD2
  geom_point(size = point_size, alpha = point_alpha, 
             aes(shape = Assigned_cluster, fill = Assigned_cluster), color = point_outline_color) +
  scale_shape_manual(values =  rep(c(21, 22, 24, 25), length.out = length(unique(dapc_final_dataset$Assigned_cluster)))) +
  scale_fill_manual(values = DAPC_map_color_palette(n = length(unique(dapc_final_dataset$Assigned_cluster)), begin = 0, end = 1)) +
  labs(shape = population_assignment_name, fill = population_assignment_name) +
  theme_classic()
dev.off()

svg("DAPC_LD1_LD3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_final_dataset, aes(x = LD1, y = LD3)) + #DAPC scatterplot
  geom_point(size = point_size, alpha = point_alpha, 
             aes(shape = Assigned_cluster, fill = Assigned_cluster), color = point_outline_color) +
  scale_shape_manual(values =  rep(c(21, 22, 24, 25), length.out = length(unique(dapc_final_dataset$Assigned_cluster)))) +
  scale_fill_manual(values = DAPC_map_color_palette(n = length(unique(dapc_final_dataset$Assigned_cluster)), begin = 0, end = 1)) +
  labs(shape = population_assignment_name, fill = population_assignment_name) +
  theme_classic()
dev.off()

plotly::plot_ly( #3D plot of DAPC using LD1-LD3
  data = dapc_final_dataset,
  x = ~LD1,
  y = ~LD2,
  z = ~LD3,
  color = ~Assigned_cluster, 
  colors = (DAPC_map_color_palette(n = length(unique(dapc_final_dataset$Assigned_cluster)), begin = 0, end = 1)),
  type = 'scatter3d', mode = 'markers',
  alpha = point_alpha_3D_plot, #marker transparency
  marker = list(size = point_size_3D_plot)) #marker size

B) DAPC with grouping identified via k-means

Identify prior grouping via k-means clustering and BIC
clusters_kmeans_pop_dataset_DAPC <- find.clusters(pop_dataset_DAPC, #apply k-means clustering to identify grouping
                                                            n.iter = 100000, #number of iterations per run
                                                            n.start = 100, #number of randomly chosen starting centroids
                                                            max.n.clusters = 50, #evaluate up to k clusters
                                                            n.pca = length(indNames(pop_dataset_DAPC))) #retain all PCs
round(sort(clusters_kmeans_pop_dataset_DAPC$Kstat), 2) #BIC for each k sorted
clusters_assignment_pop_dataset_DAPC <- as.data.frame(clusters_kmeans_pop_dataset_DAPC$grp) #assignment of each individual to the chosen number of clusters
colnames(clusters_assignment_pop_dataset_DAPC)[1] <- "Cluster_ID" #rename first column
clusters_kmeans_pop_dataset_DAPC$size #number of individuals assigned to each Cluster_assignment
## [1] 13 23 30  9 11 13 64
Replace NA values and format genotype matrix
pop_dataset_DAPC@tab <- matrix(
  as.integer(round(apply(pop_dataset_DAPC@tab, 2, function(x) {
    x[is.na(x)] <- mean(x, na.rm = T) #replace NA with column means
    return(x)}))), #return modified column
  nrow = nrow(pop_dataset_DAPC@tab), #number of rows
  ncol = ncol(pop_dataset_DAPC@tab), #number of columns
  dimnames = dimnames(pop_dataset_DAPC@tab)) #preserve original dimensions
Cross-validation to determine the optimal number of PCs using two runs
xval_results_pop_dataset_DAPC_kmeans_run1 <- adegenet::xvalDapc( #run1
  pop_dataset_DAPC@tab, #genetic data matrix (allele frequencies) 
  clusters_assignment_pop_dataset_DAPC$Cluster_ID, #population assignment (grouping factor) of individuals using Cluster_assignment IDs
  training.set = 0.9, #use 90% of data for training during cross-validation
  n.pca.max = ceiling(min(nrow(pop_dataset_DAPC@tab), ncol(pop_dataset_DAPC@tab)) * 0.6), #set maximum number of PCs to evaluate in cross-validation to 60% of total numbers of PCs
  result = "groupMean", #use group means to calculate the cross-validation result
  center = T, #center data before performing PCA
  scale = F, #do not scale data (i.e., do not standardize allele frequencies)
  n.rep = crossval_N, #set number of cross-validation replicates
  xval.plot = T)  #plot cross-validation results (mean squared error (MSE) vs. number of PCs)

optimal_pcs_pop_dataset_DAPC_kmeans_run1 <- xval_results_pop_dataset_DAPC_kmeans_run1[["Number of PCs Achieving Lowest MSE"]] #extract optimal PCs
print(optimal_pcs_pop_dataset_DAPC_kmeans_run1) #print optimal PCs
## [1] "20"
xval_results_pop_dataset_DAPC_kmeans_run2 <- adegenet::xvalDapc( #run2
  pop_dataset_DAPC@tab, clusters_assignment_pop_dataset_DAPC$Cluster_ID, training.set = 0.9, 
  result = "groupMean", center = T, scale = F, n.rep = crossval_N, xval.plot = T,
  n.pca = seq(as.numeric(optimal_pcs_pop_dataset_DAPC_kmeans_run1) - 15, 
              as.numeric(optimal_pcs_pop_dataset_DAPC_kmeans_run1) + 15, by = 1)) #refine PCs range

optimal_pcs_pop_dataset_DAPC_kmeans_run2 <- xval_results_pop_dataset_DAPC_kmeans_run2[["Number of PCs Achieving Lowest MSE"]] #extract refined optimal PCs
print(xval_results_pop_dataset_DAPC_kmeans_run2[2:6]) #print cross-validation results
## $`Median and Confidence Interval for Random Chance`
##      2.5%       50%     97.5% 
## 0.0922420 0.1393075 0.1962677 
## 
## $`Mean Successful Assignment by Number of PCs of PCA`
##         5         6         7         8         9        10        11        12 
## 0.9265306 0.9557823 0.9571429 0.9469388 0.9748299 0.9680272 0.9646259 0.9551020 
##        13        14        15        16        17        18        19        20 
## 0.9748299 0.9721088 0.9809524 0.9836735 0.9789116 0.9836735 0.9741497 0.9646259 
##        21        22        23        24        25        26        27        28 
## 0.9857143 0.9857143 0.9836735 0.9836735 0.9836735 0.9904762 0.9714286 0.9646259 
##        29        30        31        32        33        34        35 
## 0.9551020 0.9482993 0.9789116 0.9455782 0.9761905 0.9666667 0.9673469 
## 
## $`Number of PCs Achieving Highest Mean Success`
## [1] "26"
## 
## $`Root Mean Squared Error by Number of PCs of PCA`
##          5          6          7          8          9         10         11 
## 0.07780124 0.05334826 0.05429407 0.06877144 0.04098604 0.05095234 0.04563404 
##         12         13         14         15         16         17         18 
## 0.06242215 0.03501915 0.04537981 0.03688556 0.03428463 0.03744587 0.03027020 
##         19         20         21         22         23         24         25 
## 0.03428463 0.04771613 0.02608203 0.02608203 0.02686860 0.02686860 0.02686860 
##         26         27         28         29         30         31         32 
## 0.02129589 0.05634362 0.05467628 0.07250565 0.07076138 0.03080063 0.06595483 
##         33         34         35 
## 0.03367175 0.04517540 0.04933732 
## 
## $`Number of PCs Achieving Lowest MSE`
## [1] "26"
Perform DAPC using k-clusters as prior grouping
dapc_result_pop_dataset_kmeans <- dapc(pop_dataset_DAPC, pop = clusters_assignment_pop_dataset_DAPC$Cluster_ID,
                                       n.pca = as.numeric(optimal_pcs_pop_dataset_DAPC_kmeans_run2), #optimal number of PCs from cross-validation
                                       n.da = length(unique(clusters_assignment_pop_dataset_DAPC$Cluster_ID)) - 1) #number of discriminant functions (clusters - 1)
Save results
save(xval_results_pop_dataset_DAPC_kmeans_run1, optimal_pcs_pop_dataset_DAPC_kmeans_run1, 
     xval_results_pop_dataset_DAPC_kmeans_run2, optimal_pcs_pop_dataset_DAPC_kmeans_run2,
     dapc_result_pop_dataset_kmeans,
     file = "DAPC_results_pop_dataset_kmeans.Rdata")
load("DAPC_results_pop_dataset_kmeans.Rdata")
Evaluate DAPC results
print(round(dapc_result_pop_dataset_kmeans$var, 2) * 100) #conserved variance in percent
## [1] 70
barplot(dapc_result_pop_dataset_kmeans$eig, names.arg = paste0("LD", seq_along(dapc_result_pop_dataset_kmeans$eig)), #plot eigenvalues of discriminant functions 
        xlab = "Linear discriminants", ylab = "Eigenvalues")

round(dapc_result_pop_dataset_kmeans$eig, 1) #show eigenvalues of discriminant functions
## [1] 796.8 523.4 225.1  87.3  77.2  52.5
dapc_kmeans_relative_LDs <- round(dapc_result_pop_dataset_kmeans$eig / sum(dapc_result_pop_dataset_kmeans$eig) * 100, 1)
barplot(dapc_kmeans_relative_LDs, #plot relative proportions of eigenvalues of discriminant functions
        names.arg = paste0("LD", seq_along(dapc_result_pop_dataset_kmeans$eig)), 
        xlab = "Linear discriminants", ylab = "Relative proportion of eigenvalues [%]")

Visualize main DAPC results
cols_pop_DAPC_kmeans <- viridis::viridis(n = length(unique(clusters_assignment_pop_dataset_DAPC$Cluster_ID)), begin = 0, end = 1) #define color palette
shapes_pop_DAPC_kmeans <- rep(c(21, 24, 22, 25), length.out = length(unique(clusters_assignment_pop_dataset_DAPC$Cluster_ID))) #define shape palette
dapc_kmeans_df_pop_dataset <- data.frame(dapc_result_pop_dataset_kmeans$ind.coord, group = dapc_result_pop_dataset_kmeans$grp) #create dataframe for visualization
LD1_label <- paste("LD1 (", dapc_kmeans_relative_LDs[1], "%)", sep = "") #create axis labels for LD1 showing relative proportions of eigenvalues of discriminant functions in percent
LD2_label <- paste("LD2 (", dapc_kmeans_relative_LDs[2], "%)", sep = "") #create axis labels for LD2 showing relative proportions of eigenvalues of discriminant functions in percent
LD3_label <- paste("LD3 (", dapc_kmeans_relative_LDs[3], "%)", sep = "") #create axis labels for LD3 showing relative proportions of eigenvalues of discriminant functions in percent

svg("DAPC_kmeans_LD1_LD2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, y = LD2)) + #kmeans DAPC scatterplot of LD1 vs LD2
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
  scale_shape_manual(values = shapes_pop_DAPC_kmeans) +
  scale_fill_manual(values = cols_pop_DAPC_kmeans) +
  labs(shape = population_assignment_name, fill = population_assignment_name, x = LD1_label, y = LD2_label) +
  theme_classic() +
  guides(override.aes = list(alpha = 1)) +
  coord_equal()
dev.off()

svg("DAPC_kmeans_LD1_LD3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, y = LD3)) + #kmeans DAPC scatterplot of LD1 vs LD3
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
  scale_shape_manual(values = shapes_pop_DAPC_kmeans) +
  scale_fill_manual(values = cols_pop_DAPC_kmeans) +
  labs(shape = population_assignment_name, fill = population_assignment_name, x = LD1_label, y = LD3_label) +
  theme_classic() +
  guides(override.aes = list(alpha = 1))
dev.off()

svg("DAPC_kmeans_densi_LD1_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, fill = group)) + #density plot for LD1
  geom_density(alpha = density_plot_alpha) +
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop_DAPC_kmeans) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_flip()
dev.off()

svg("DAPC_kmeans_densi_LD2_pop_dataset.svg", width = plot_height, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD2, fill = group)) + #density plot for LD2
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop_DAPC_kmeans) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_flip()
dev.off()

svg("DAPC_kmeans_densi_LD3_pop_dataset.svg", width = plot_height, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD3, fill = group)) + #density plot for LD3
  geom_density(alpha = density_plot_alpha) +
  scale_fill_manual(values = cols_pop_DAPC_kmeans) +
  theme_void() +
  theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_flip()
dev.off()

It is then possible to combine the DAPC plots with the density plots (e.g., using Power Point)

plotly::plot_ly( #3D plot of DAPC_kmeans using LD1-LD3
  data = dapc_kmeans_df_pop_dataset,
  x = ~LD1, y = ~LD2, z = ~LD3,
  color = ~group, colors = cols_pop_DAPC_kmeans,
  type = 'scatter3d', mode = 'markers',
  alpha = point_alpha_3D_plot, #marker transparency
  marker = list(size = point_size_3D_plot)) #marker size
Plot DAPC kmeans with scaled LD axes

Plot the DAPC with scaled LD axes according to proportion of variance explained by each respective eigenvalue to ensure that distances between points in 3D plot reflect each discriminant function’s contribution

eig_proportions_dapc_kmeans <- dapc_result_pop_dataset_kmeans$eig / sum(dapc_result_pop_dataset_kmeans$eig) #calculate relative proportions of eigenvalues
dapc_kmeans_df_pop_dataset_scaled <- dapc_df_pop_dataset
dapc_kmeans_df_pop_dataset_scaled$LD1 <- dapc_kmeans_df_pop_dataset_scaled$LD1 * sqrt(eig_proportions_dapc_kmeans[1]) #scale LD1
dapc_kmeans_df_pop_dataset_scaled$LD2 <- dapc_kmeans_df_pop_dataset_scaled$LD2 * sqrt(eig_proportions_dapc_kmeans[2]) #scale LD2
dapc_kmeans_df_pop_dataset_scaled$LD3 <- dapc_kmeans_df_pop_dataset_scaled$LD3 * sqrt(eig_proportions_dapc_kmeans[3]) #scale LD3

ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, y = LD2)) + #kmeans DAPC scaled scatterplot of LD1 vs LD2
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
  scale_shape_manual(values = shapes_pop_DAPC_kmeans) +
  scale_fill_manual(values = cols_pop_DAPC_kmeans) +
  labs(shape = population_assignment_name, fill = population_assignment_name, x = paste("LD1 (scaled)"), y = paste("LD2 (scaled)")) +
  theme_classic() +
  coord_equal() +
  guides(override.aes = list(alpha = 1))

ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, y = LD3)) + #kmeans DAPC scaled scatterplot of LD1 vs LD2
  geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
  scale_shape_manual(values = shapes_pop_DAPC_kmeans) +
  scale_fill_manual(values = cols_pop_DAPC_kmeans) +
  labs(shape = population_assignment_name, fill = population_assignment_name, x = paste("LD1 (scaled)"), y = paste("LD3 (scaled)")) +
  theme_classic() +
  coord_equal() +
  guides(override.aes = list(alpha = 1))

plotly::plot_ly( #3D plot of DAPC_kmeans using scaled LD1-LD3
  data = dapc_kmeans_df_pop_dataset_scaled,
  x = ~LD1, y = ~LD2, z = ~LD3,
  color = ~Cluster, colors = cols_pop_DAPC_kmeans,
  type = 'scatter3d', mode = 'markers',
  alpha = point_alpha_3D_plot, marker = list(size = point_size_3D_plot)
) %>% layout(scene = list(xaxis = list(title = "LD1", range = c(min(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))), max(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))))),
                          yaxis = list(title = "LD2", range = c(min(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))), max(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))))),
                          zaxis = list(title = "LD3", range = c(min(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))), max(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3)))))))
Create Structure-like plot showing cluster posterior probabilities for individuals across populations
dapc_kmeans_group_memberships <- as.data.frame(dapc_result_pop_dataset_kmeans$posterior) #convert posterior probabilities to data frame
dapc_kmeans_group_memberships$ID <- row.names(dapc_kmeans_group_memberships) #add ID column
dapc_kmeans_group_memberships$Population <- pop_dataset$pop[match(dapc_kmeans_group_memberships$ID, indNames(pop_dataset))] #match populations to IDs
dapc_kmeans_group_memberships_long <- melt(dapc_kmeans_group_memberships, id.vars = c("ID", "Population"), variable.name = "Cluster", value.name = "Probability") #reshape to long format

svg("DAPC_kmeans_Membership_probabilities.svg", width = 25/2.56, height = 25/2.56)
ggplot(dapc_kmeans_group_memberships_long, aes(x = Probability, y = ID, fill = Cluster)) + #create Structure-like plots 
  geom_bar(stat = "identity") + 
  theme_classic() +
  scale_fill_manual(values = cols_pop_DAPC_kmeans) + #use viridis color palette
  facet_wrap(~ Population, scales = "free_y") + #split y-axis by population
  labs(fill = "Cluster") +
  theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 3), axis.text.x = element_blank(), 
        axis.title.y = element_blank(), axis.title.x = element_blank(), panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), panel.background = element_blank(), plot.background = element_blank())
dev.off()

##### Assess allele differentiation
dapc_kmeans_loading_LD1 <- adegenet::loadingplot(dapc_result_pop_dataset_kmeans$var.contr[, 1], lab.jitter = 100, #plot loading for first LD axis 
                                                 thres = quantile(dapc_result_pop_dataset_kmeans$var.contr[, 1], 0.99)) #set threshold to 99th percentile (top 1% of loadings)

dapc_kmeans_loading_LD2 <- adegenet::loadingplot(dapc_result_pop_dataset_kmeans$var.contr[, 2], lab.jitter = 100, #plot loading for second LD axis 
                                                 thres = quantile(dapc_result_pop_dataset_kmeans$var.contr[, 2], 0.99)) #set threshold to 99th percentile (top 1% of loadings)

dapc_kmeans_loading_LD1$var.names #print most differentiated alleles for LD1
## [1] "ctg00000001_1_12521730.0" "ctg00000001_1_12521730.1"
## [3] "ctg00000003_1_4110250.0"  "ctg00000005_1_16585380.1"
## [5] "ctg00000012_1_13095659.0" "ctg00000012_1_13095659.1"
dapc_kmeans_loading_LD2$var.names #print most differentiated alleles for LD2
## [1] "ctg00000001_1_12416200.0" "ctg00000001_1_12416200.1"
## [3] "ctg00000003_1_2808894.0"  "ctg00000003_1_2808894.1" 
## [5] "ctg00000005_1_16183223.0" "ctg00000005_1_16183223.1"
intersect(dapc_kmeans_loading_LD1$var.names, dapc_kmeans_loading_LD2$var.names) #show differentiated alleles for both LD1 and LD2
## character(0)
Assess and visualize group memberships
dapc_kmeans_group_memberships <- as.data.frame(dapc_result_pop_dataset_kmeans$posterior) #convert posterior probabilities to data frame
subset_size <- 40 #define subset size
plot_assignments <- function(dapc_kmeans_result, subset_size, max_rows) { #plot group memberships (heat colors represent membership probabilities with red=1 and white=0, blue crosses represent prior Cluster_assignment provided to DAPC kmeans)
  num_plots <- ceiling(max_rows / subset_size) #calculate number of plots needed
  for (i in 0:(num_plots - 1)) {
    start_row <- i * subset_size + 1 #start index for subset
    end_row <- min((i + 1) * subset_size, max_rows) #end index for subset
    cat("Plotting rows", start_row, "to", end_row, "\n") #print status
    adegenet::assignplot(dapc_kmeans_result, subset = start_row:end_row)}} #plot assignments for current subset
plot_assignments(dapc_result_pop_dataset_kmeans, subset_size, nrow(dapc_kmeans_group_memberships)) #plot all subsets
## Plotting rows 1 to 40

## Plotting rows 41 to 80

## Plotting rows 81 to 120

## Plotting rows 121 to 160

## Plotting rows 161 to 163

Identify and plot admixed individuals
dapc_result_pop_dataset_kmeans$grp <- factor(dapc_result_pop_dataset_kmeans$grp, levels = unique(dapc_result_pop_dataset_kmeans$grp))
admixed_individuals <- which(apply(dapc_result_pop_dataset_kmeans$posterior, 1, function(e) all(e < 0.80))) #identify admixed individuals (posterior probabilities less than 0.80)
admixed_individuals #show admixed individuals
## H.m.maia_NY_DR95 
##              127
if (length(admixed_individuals) > 0) {adegenet::compoplot(dapc_result_pop_dataset_kmeans, col.pal = cols_pop_DAPC_kmeans, subset = admixed_individuals)} #plot admixed individuals

Assign individuals to clusters based on threshold
dapc_result_pop_dataset_kmeans_posterior <- as.data.frame(dapc_result_pop_dataset_kmeans$posterior)
dapc_result_pop_dataset_kmeans_posterior[sapply(dapc_result_pop_dataset_kmeans_posterior, is.numeric)] <- round(dapc_result_pop_dataset_kmeans_posterior[sapply(dapc_result_pop_dataset_kmeans_posterior, is.numeric)], 2) #round numeric columns
dapc_result_pop_dataset_kmeans_posterior$ID <- rownames(dapc_result_pop_dataset_kmeans_posterior) #add ID column
assign_clusters <- function(probs, threshold_high, threshold_low) { #define function to assign clusters
  high_probs <- which(probs > threshold_high)
  mid_probs <- which(probs > threshold_low & probs <= threshold_high)
  if (length(high_probs) == 1) {
    assigned_cluster <- as.character(high_probs) #single cluster assignment
  } else if (length(mid_probs) >= 2 & length(mid_probs) <= 3) {
    assigned_cluster <- paste(mid_probs, collapse = "+") #mixed cluster assignment
  } else {assigned_cluster <- admixed_individuals_cluster} #admixed assignment
  return(assigned_cluster)}
dapc_result_pop_dataset_kmeans_posterior$Assigned_cluster <- apply( #apply function to each cluster
  dapc_result_pop_dataset_kmeans_posterior[, 1:length(unique(levels(dapc_result_pop_dataset_kmeans$grp)))], 1, 
  assign_clusters, threshold_high = threshold_high, threshold_low = threshold_low)
dapc_kmeans_final_dataset <- left_join(dataset, dapc_result_pop_dataset_kmeans_posterior, by = "ID") #combine datasets
Create map with DAPC kmeans clusters
dapc_kmeans_dataset_map <- dapc_kmeans_final_dataset %>% tidyr::drop_na() #remove empty rows for plotting
DAPC_kmeans_map_color_palette <- viridis::viridis #set colorblind-friendly color palette
dapc_kmeans_dataset_map$Assigned_cluster <- factor(dapc_kmeans_dataset_map$Assigned_cluster, levels = unique(c(sort(as.character(dapc_kmeans_dataset_map$Assigned_cluster[stringr::str_detect(dapc_kmeans_dataset_map$Assigned_cluster, "^[0-9]+$")])), sort(dapc_kmeans_dataset_map$Assigned_cluster[stringr::str_detect(dapc_kmeans_dataset_map$Assigned_cluster, "^[0-9]+(\\+[0-9]+){1,2}$")]), admixed_individuals_cluster)))#reorder factor levels in dataset so that single clusters are first, followed by multiple clusters and admixed_individual_cluster
dapc_kmeans_dataset_map$Assigned_cluster <- droplevels(dapc_kmeans_dataset_map$Assigned_cluster)

svg("DAPC_kmeans_cluster_map.svg", width = DAPC_map_width, height = DAPC_map_height)
map_DAPC_cluster <- ggplot(data = dapc_kmeans_dataset_map, aes(x = Longitude, y = Latitude)) +
  theme_void() + #use theme_void for no background
  borders("world", colour = DAPC_map_country_border_color, fill = DAPC_map_filling_color, 
          linewidth = DAPC_map_country_border_thickness) +
  geom_polygon(data = map_data("state"), aes(x = long, y = lat, group = group), #add state borders (only for USA!)
               fill = NA, colour = DAPC_map_state_border_color, 
               linewidth = DAPC_map_state_border_thickness) +
  geom_point(aes(fill = Assigned_cluster, shape = Assigned_cluster), 
             alpha = DAPC_map_point_alpha, color = DAPC_map_point_outline_color, 
             size = DAPC_map_point_size) +
  geom_jitter(aes(fill = Assigned_cluster, shape = Assigned_cluster), 
              width = DAPC_map_point_jitter, height = DAPC_map_point_jitter, 
              alpha = DAPC_map_point_alpha, color = DAPC_map_point_outline_color, 
              size = DAPC_map_point_size) +
  coord_fixed(xlim = xlim_range, ylim = ylim_range) + #set coordinates with buffer
  scale_fill_manual(values = DAPC_kmeans_map_color_palette(n = length(levels(unique(dapc_kmeans_dataset_map$Assigned_cluster))), begin = 0, end = 1)) + #assign colors 
  scale_shape_manual(values = rep(c(21, 24, 22, 25), length.out = length(levels(unique(dapc_kmeans_dataset_map$Assigned_cluster))))) + #assign shapes
  guides(fill = guide_legend(override.aes = list(size = DAPC_map_point_size, alpha = 1)), 
         shape = guide_legend(override.aes = list(size = DAPC_map_point_size, alpha = 1))) + 
  labs(fill = "Cluster", shape = "Cluster") #add labels
suppressWarnings(print(map_DAPC_cluster)) #print map and suppress warnings
dev.off() #close svg device

Save assignment probabilities of DAPC as text file
names(dapc_kmeans_df_pop_dataset)[names(dapc_kmeans_df_pop_dataset) == "group"] <- "Cluster"
dapc_kmeans_df_pop_dataset$ID <- row.names(dapc_kmeans_df_pop_dataset)
dapc_kmeans_final_dataset <- left_join(dapc_kmeans_df_pop_dataset, dapc_kmeans_final_dataset, by = "ID")
write.table(dapc_kmeans_final_dataset[order(dapc_kmeans_final_dataset$ID), ], #sort by ID
            file = "DAPC_kmeans_final_dataset_assignment_probabilities_clusters", 
            sep = "\t", dec = ",", row.names = F, col.names = T, quote = F)
Plot DAPC results with assigned clusters (including mixed clusters)
dapc_kmeans_final_dataset <- droplevels(na.omit(dapc_kmeans_final_dataset)) #drop rows with NA values and remove unused factor levels
dapc_kmeans_final_dataset$Assigned_cluster <- factor(dapc_kmeans_final_dataset$Assigned_cluster, levels = c(unique(as.character(1:length(unique(dapc_kmeans_df_pop_dataset$Cluster)))), sort(unique(dapc_kmeans_final_dataset$Assigned_cluster[grep("\\+", dapc_kmeans_final_dataset$Assigned_cluster)])), admixed_individuals_cluster)) #reorder factor levels in dataset so that single clusters are first, followed by multiple clusters and admixed_individuals_cluster

ggplot(dapc_kmeans_final_dataset, aes(x = LD1, y = LD2)) + #DAPC scatterplot of LD1 vs LD2
  geom_point(size = point_size, alpha = point_alpha, 
             aes(shape = Assigned_cluster, fill = Assigned_cluster), color = point_outline_color) +
  scale_shape_manual(values =  rep(c(21, 22, 24, 25), length.out = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)))) +
  scale_fill_manual(values = DAPC_map_color_palette(n = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)), begin = 0, end = 1)) +
  labs(shape = population_assignment_name, fill = population_assignment_name) +
  theme_classic()

ggplot(dapc_kmeans_final_dataset, aes(x = LD1, y = LD3)) + #DAPC scatterplot of LD1 vs LD3
  geom_point(size = point_size, alpha = point_alpha, 
             aes(shape = Assigned_cluster, fill = Assigned_cluster), color = point_outline_color) +
  scale_shape_manual(values =  rep(c(21, 22, 24, 25), length.out = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)))) +
  scale_fill_manual(values = DAPC_map_color_palette(n = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)), begin = 0, end = 1)) +
  labs(shape = population_assignment_name, fill = population_assignment_name) +
  theme_classic()

plotly::plot_ly( #3D plot of DAPC using LD1-LD3
  data = dapc_kmeans_final_dataset,
  x = ~LD1, y = ~LD2, z = ~LD3,
  color = ~Assigned_cluster, 
  colors = DAPC_map_color_palette(n = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)), begin = 0, end = 1),
  type = 'scatter3d', mode = 'markers',
  alpha = point_alpha_3D_plot, #marker transparency
  marker = list(size = point_size_3D_plot)) #marker size